Print this page
Update from fsd_sep3 webrev to fsd_sep9


  18 #include <sys/fsh.h>
  19 #include <sys/fsh_impl.h>
  20 #include <sys/id_space.h>
  21 #include <sys/kmem.h>
  22 #include <sys/ksynch.h>
  23 #include <sys/list.h>
  24 #include <sys/sunddi.h>
  25 #include <sys/sysmacros.h>
  26 #include <sys/types.h>
  27 #include <sys/vfs.h>
  28 #include <sys/vnode.h>
  29 
  30 /*
  31  * Filesystem hook framework (fsh)
  32  *
  33  * 1. Abstract.
  34  * The main goal of the filesystem hook framework is to provide an easy way to
  35  * inject client-defined behaviour into vfs/vnode calls. fsh works on
  36  * vfs_t granularity.
  37  *


  38  *

  39  * 2. Overview.
  40  * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
  41  *      - pointers to hooking functions (named after corresponding
  42  *      vnodeops/vfsops)
  43  *      - a pointer to an argument to pass (this is shared for all the
  44  *      hooks in a given fsh_t)
  45  *      - a pointer to the *hook remove callback* - it's being fired after a
  46  *      hook is removed and the hook has stopped executing. It's safe to destroy
  47  *      any data associated with this hook.
  48  *
  49  * The information from fsh_t is copied by the fsh and an fsh_handle_t
  50  * is returned. It should be used for further removing.
  51  *
  52  *
  53  * 3. Usage.
  54  * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
  55  * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
  56  *
  57  * fsh_t is a structure filled out by the client. If a client does not want
  58  * to add/remove a hook for function foo(), he should fill the foo field of
  59  * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
  60  * two additional arguments:
  61  *      - fsh_int_t *fsh_int - this argument MUST be passed to
  62  *      hook_next_foo(). fsh wouldn't know which hook to execute next
  63  *      without it
  64  *      - void *arg - this is the argument passed with fsh_t during
  65  *      installation
  66  *      - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback
  67  *      (mentioned earlier); it's first argument is arg, the second is the
  68  *      handle
  69  *

































  70  * After installation, an fsh_handle_t is returned to the caller.
  71  *
  72  * Every hook function is responsible for passing the control to the next
  73  * hook associated with a particular call. In order to provide an easy way to
  74  * modify the behaviour of a function call both before and after the
  75  * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
  76  * fsh_next_foo() at some point. This function does necessary internal
  77  * operations and calls the next hook, until there's no hook left, then it
  78  * calls the underlying vfsop/vnodeop.
  79  * Example:
  80  * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
  81  *      cmn_err(CE_NOTE, "freefs called!\n");
  82  *      return (fsh_next_freefs(fsh_int, vfsp));
  83  * }
  84  *


  85  *
  86  * A client might want to fire callbacks when vfs_t's are being mounted




  87  * or freed. There's an fsh_callback_t structure provided to install such
  88  * callbacks along with the API.
  89  * It is legal to call fsh_hook_{install,remove}() inside a mount callback
  90  * WITHOUT holding the vfs_t.
  91  *
  92  * After vfs_t's free callback returns, all the handles associated with the
  93  * hooks installed on this vfs_t are invalid and must not be used.
  94  *
  95  *
  96  * 4. API
  97  * None of the APIs should be called during interrupt context above lock
  98  * level. The only exceptions are fsh_next_foo() functions, which do not use
  99  * locks.
 100  *
 101  * a) fsh.h
 102  * Any of these functions could be called inside a hook or a hook remove
 103  * callback.
 104  * fsh_callback_{install,remove}() must not be called inside a {mount,free}
 105  * callback. Doing so will cause a deadlock. Other functions can be called
 106  * inside {mount,free} callbacks.
 107  *

 108  * fsh_fs_enable(vfs_t *vfsp)
 109  * fsh_fs_disable(vfs_t *vfsp)
 110  *      Enables/disables fsh for a given vfs_t.
 111  *
 112  * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 113  *      Installs hooks on vfsp filesystem.
 114  *      It's important that hooks are executed in LIFO installation order,
 115  *      which means that if there are hooks A and B installed in this order, B
 116  *      is going to be executed before A.
 117  *      It returns a correct handle, or (-1) if hook/callback limit exceeded.
 118  *      The handle is valid until a free callback returns or an explicit call
 119  *      to fsh_hook_remove().
 120  *
 121  * fsh_hook_remove(fsh_handle_t handle)
 122  *      Removes a hook and invalidates the handle.
 123  *      It is guaranteed that after this funcion returns, calls to
 124  *      vnodeops/vfsops won't go through this hook, although there might be
 125  *      some threads still executing this hook. When hook remove callback is
 126  *      fired, it is guaranteed that the hook won't be executed anymore. It is
 127  *      safe to remove all the internal data associated with this hook inside
 128  *      the hook remove callback. The hook remove callback could be called
 129  *      inside fsh_hook_remove().
 130  *
 131  * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
 132  *      This is the function which should be called once in every hook. It
 133  *      does the necessary internal operations and passes control to the
 134  *      next hook or, if there's no hook left, to the underlying
 135  *      vfsop/vnodeop.
 136  *
 137  * fsh_callback_install(fsh_callback_t *callback)
 138  * fsh_callback_remove(fsh_callback_handle_t handle)
 139  *      Installs/removes callbacks for vfs_t mount/free. The mount callback
 140  *      is executed right before domount() returns. The free callback is
 141  *      called right before VFS_FREEVFS() is called.
 142  *      The fsh_callback_install() returns a correct handle, or (-1) if
 143  *      hook/callback limit exceeded.
 144  *

 145  * b) fsh_impl.h (for vfs.c and vnode.c only)
 146  * fsh_init()
 147  *      This call has to be done in vfsinit(). It initialises the fsh. It
 148  *      is absolutely necessary that this call is made before any other fsh
 149  *      operation.
 150  *
 151  * fsh_exec_mount_callbacks(vfs_t *vfsp)
 152  * fsh_exec_free_callbacks(vfs_t *vfsp)
 153  *      Used to execute all fsh callbacks for {mount,free} of a vfs_t.
 154  *
 155  * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
 156  *      Destroys an fsh_fsrecord structure. All the hooks installed on this
 157  *      vfs_t are then destroyed. free callback is called before this function.
 158  *
 159  * fsh_foo(ARGUMENTS)
 160  *      Function used to start executing the hook chain for a given call.
 161  *
 162  *
 163  * 5. Internals.
 164  * fsh_int_t is an internal hook structure. It is reference counted.
 165  * fshi_hold() and fshi_rele() should be used whenever needed.
 166  * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
 167  * (local to vfs_t). All entries are unique and are identified by fshi_handle.
 168  *
 169  * fsh_int_t properties:
 170  *      - fsh_hook_install() sets the ref. counter to 1 and adds it to both
 171  *      fsh_map and fshfsr_list
 172  *      - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
 173  *      from fsh_map and marks the hook as *doomed*
 174  *      - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
 175  *      executing it
 176  *      - if fsh_int_t is marked as *doomed*, the reference counter is not
 177  *      be increased and thus no thread can acquire this fsh_int_t
 178  *      - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
 179  *      also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
 180  *      - fsh_int_t could be also destroyed without fsh_hook_remove() call,
 181  *      that happens only inside fsh_fsrec_destroy() where it is guaranteed
 182  *      that there is no thread executing the hook
 183  *
 184  *
 185  * fsh_fsrecord_t is a structure which lives inside a vfs_t.
 186  * fsh_fsrecord_t contains:
 187  *      - an rw-lock that protects the structure
 188  *      - a list of hooks installed on this vfs_t
 189  *      - a flag which tells whether fsh is enabled on this vfs_t
 190  *
 191  *
 192  * fsh_prepare_fsrec rule:
 193  * Every function that needs vfsp->vfs_fshrecord has to call
 194  * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to
 195  * use vfsp->vfs_fshrecord.
 196  *
 197  * Unfortunately, because of unexpected behaviour of some filesystems (no use
 198  * of vfs_alloc()/vfs_init()) there's no good place to initialise the
 199  * fsh_fshrecord_t structure. The approach being used here is to check if it's
 200  * initialised in every call. Because of the fact that no lock could be used
 201  * here (the same problem with initialisation), a spinlock is used.  This is
 202  * explained in more detail in a comment before fsh_prepare_fsrec(). After
 203  * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
 204  * pointer locally, because it won't be changed until vfs_free() is called.
 205  *
 206  * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
 207  * where there is expected that no other fsh calls would be made for the
 208  * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
 209  * valid pointer and could not be concurrently accessed.


 210  *

 211  * When there are no fsh functions (that use a particular fsh_fsrecord_t)
 212  * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
 213  * would be NULL or a pointer to an initialised fsh_fsrecord_t.
 214  *








 215  *

 216  * Callbacks:
 217  * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
 218  * before returning from domount()@vfs.c.
 219  *
 220  * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
 221  * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
 222  *
 223  *
 224  * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
 225  *      This function is quite simple. It takes the fsh_int_t and passes control
 226  *      to the next hook or to the underlying vnodeop/vfsop.
 227  *
 228  *
 229  * 6. Locking
 230  * a) public
 231  * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
 232  * needed, the client does that.
 233  *
 234  * fsh_callback_{install,remove} must not be called inside a callback, because
 235  * it will cause a deadlock.
 236  *
 237  * b) internal



 238  * Locking diagram:
 239  *
 240  *     fsh_hook_install()    fsh_hook_remove()   fsh_fsrec_destroy()
 241  *           |                     |                |
 242  *           |                     |                |
 243  *           +------------------+  |   +------------+





 244  *                              |  |   |
 245  *                              V  V   V
 246  *                              fsh_lock
 247  *                                 |   |
 248  *                                 |   +----- fshfsr_lock, RW_WRITER ---+
 249  *                                 |                                    |
 250  *                                 V                                    |
 251  *               +---------------------------------------+              |
 252  *               |               fsh_map                 |              |
 253  *               |                                       |              |
 254  *          +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+
 255  *          |    +------------------------------^--------+
 256  *          |                                   |
 257  *          |                                   |
 258  * fshfsr_lock, RW_READER              fshfsr_lock, RW_WRITER
 259  *          |                                   |
 260  *          |                                   |
 261  *   fsh_read(),                            fshi_rele()
 262  *   fsh_write(),
 263  *   ...,                               Might be called from:
 264  *   fsh_next_read(),                    fsh_hook_remove()
 265  *   fsh_next_write(),                   fsh_read(), fsh_write(), ...
 266  *   ...                                 fsh_next_read(), fsh_next_write(), ...
 267  *

 268  * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
 269  * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
 270  * it destroys the unremoved hooks). It is used only when fsh_map needs to be
 271  * locked. The usage of this lock guarantees that the data in fsh_map and
 272  * fshfsr_lists is consistent.






 273  */
 274 
 275 
 276 /* Internals */
 277 struct fsh_int {
 278         fsh_handle_t    fshi_handle;
 279         fsh_t           fshi_hooks;
 280         vfs_t           *fshi_vfsp;
 281 
 282         kmutex_t        fshi_lock;
 283         uint64_t        fshi_ref;
 284         uint64_t        fshi_doomed;    /* changed inside fsh_lock */
 285 
 286         /* next node in fshfsr_list */
 287         list_node_t     fshi_next;
 288 
 289         /* next node in fsh_map */
 290         list_node_t     fshi_global;
 291 };
 292 
 293 typedef struct fsh_callback_int {
 294         fsh_callback_t  fshci_cb;
 295         fsh_callback_handle_t fshci_handle;
 296         list_node_t     fshci_next;
 297 } fsh_callback_int_t;
 298 
 299 







 300 static kmutex_t fsh_lock;
 301 
 302 /*
 303  * fsh_fsrecord_t is the main internal structure. It's content is protected
 304  * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
 305  * the vfs_t that contains the fsh_fsrecord_t.
 306  */
 307 struct fsh_fsrecord {
 308         krwlock_t       fshfsr_lock;
 309         int             fshfsr_enabled;
 310         list_t          fshfsr_list;
 311 };
 312 
 313 /*
 314  * Global list of fsh_int_t. Protected by fsh_lock.
 315  */
 316 static list_t fsh_map;
 317 
 318 /*
 319  * Global list of fsh_callback_int_t.
 320  */
 321 static krwlock_t fsh_cblist_lock;


 322 static list_t fsh_cblist;
 323 
 324 /*
 325  * A reserved pointer for fsh purposes. It is used because of the method
 326  * chosen for solving concurrency issues with vfs_fshrecord. The full
 327  * explanation is in the big theory statement at the beginning of this
 328  * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
 329  */
 330 static void *fsh_res_ptr;
 331 
 332 static fsh_fsrecord_t *fsh_fsrec_create();
 333 
 334 int fsh_limit = INT_MAX;
 335 static id_space_t *fsh_idspace;
 336 
 337 /*
 338  * fsh_prepare_fsrec()
 339  *
 340  * Important note:
 341  * Before using this function, fsh_init() MUST be called. We do that in
 342  * vfsinit()@vfs.c.
 343  *
 344  * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
 345  * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
 346  * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
 347  * filesystems could do the same thing. That's why this solution is
 348  * introduced. It should be called before any code that needs access to
 349  * vfs_fshrecord.
 350  *
 351  * Locking:
 352  * There are no locks here, because there's no good place to initialise
 353  * the lock. Concurrency issues are solved by using atomic instructions
 354  * and a spinlock, which is spinning only once for a given vfs_t. Because
 355  * of that, the usage of the spinlock isn't bad at all.
 356  *
 357  * How it works:
 358  * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
 359  *      fsh_res_ptr. That's a signal for other threads, that the structure
 360  *      is being initialised.
 361  * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
 362  *      because vfs_fshrecord is being initialised by another call.
 363  * c) other cases:
 364  *      vfs_fshrecord is already initialised, so we can use it. It won't change
 365  *      until vfs_free() is called. It can't happen when someone is holding
 366  *      the vfs_t, which is expected from the caller of fsh API.
 367  */
 368 static void
 369 fsh_prepare_fsrec(vfs_t *vfsp)
 370 {
 371         fsh_fsrecord_t *fsrec;
 372 
 373         while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
 374             fsh_res_ptr)) == fsh_res_ptr)
 375                 ;
 376 
 377         if (fsrec == NULL)
 378                 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
 379 }
 380 
 381 /*
 382  * API for enabling/disabling fsh per vfs_t.
 383  *
 384  * A newly created vfs_t has fsh enabled by default. If one would want to change
 385  * this behaviour, mount callbacks could be used.
 386  *
 387  * The caller is expected to hold the vfs_t.
 388  *
 389  * These functions must NOT be called in a hook.
 390  */
 391 void
 392 fsh_fs_enable(vfs_t *vfsp)
 393 {
 394         fsh_prepare_fsrec(vfsp);
 395 
 396         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 397         vfsp->vfs_fshrecord->fshfsr_enabled = 1;
 398         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 399 }
 400 
 401 void
 402 fsh_fs_disable(vfs_t *vfsp)
 403 {
 404         fsh_prepare_fsrec(vfsp);
 405 
 406         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 407         vfsp->vfs_fshrecord->fshfsr_enabled = 0;
 408         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 409 }
 410 
 411 /*
 412  * API used for installing hooks. fsh_handle_t is returned for further
 413  * actions (currently just removing) on this set of hooks.
 414  *
 415  * fsh_t fields:
 416  * - arg - argument passed to every hook
 417  * - remove_cb - remove callback, called after a hook is removed and all the
 418  *      threads stops executing it
 419  * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
 420  *      if there is no hook desired for an operation, it should be set to
 421  *      NULL
 422  *
 423  * It's important that the hooks are executed in LIFO installation order (they
 424  * are added to the head of the hook list).
 425  *
 426  * The caller is expected to hold the vfs_t.
 427  *
 428  * Returns (-1) if hook/callback limit exceeded, handle otherwise.
 429  */
 430 fsh_handle_t
 431 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 432 {
 433         fsh_handle_t    handle;
 434         fsh_int_t       *fshi;
 435 
 436         fsh_prepare_fsrec(vfsp);
 437 
 438         if ((handle = id_alloc(fsh_idspace)) == -1)
 439                 return (-1);
 440 
 441         fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
 442         mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
 443         (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
 444         fshi->fshi_handle = handle;
 445         fshi->fshi_doomed = 0;
 446         fshi->fshi_ref = 1;
 447         fshi->fshi_vfsp = vfsp;
 448 
 449         mutex_enter(&fsh_lock);
 450         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 451         list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
 452         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 453 
 454         list_insert_head(&fsh_map, fshi);
 455         mutex_exit(&fsh_lock);
 456 


 482 fshi_rele(fsh_int_t *fshi)
 483 {
 484         int destroy;
 485 
 486         mutex_enter(&fshi->fshi_lock);
 487         ASSERT(fshi->fshi_ref > 0);
 488         fshi->fshi_ref--;
 489         if (fshi->fshi_ref == 0) {
 490                 ASSERT(fshi->fshi_doomed == 1);
 491                 destroy = 1;
 492         } else {
 493                 destroy = 0;
 494         }
 495         mutex_exit(&fshi->fshi_lock);
 496 
 497         if (destroy) {
 498                 /*
 499                  * At this point, we are sure that fsh_hook_remove() has been
 500                  * called, that's why we don't remove the fshi from fsh_map.
 501                  * fsh_hook_remove() did that already.

 502                  */
 503                 fsh_fsrecord_t *fsrecp;
 504 
 505                 if (fshi->fshi_hooks.remove_cb != NULL)
 506                         (*fshi->fshi_hooks.remove_cb)(
 507                             fshi->fshi_hooks.arg, fshi->fshi_handle);
 508                 /*
 509                  * We don't have to call fsh_prepare_fsrec() here.
 510                  * fsh_fsrecord_t is already initialised, because we've found a
 511                  * mapping for the given handle.
 512                  */
 513                 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
 514                 ASSERT(fsrecp != NULL);
 515                 ASSERT(fsrecp != fsh_res_ptr);
 516 
 517                 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
 518                 list_remove(&fsrecp->fshfsr_list, fshi);
 519                 rw_exit(&fsrecp->fshfsr_lock);
 520 




 521                 id_free(fsh_idspace, fshi->fshi_handle);
 522                 mutex_destroy(&fshi->fshi_lock);
 523                 kmem_free(fshi, sizeof (*fshi));
 524         }
 525 }
 526 
 527 /*
 528  * Used for removing a hook set.
 529  *
 530  * fsh_hook_remove() invalidates the given handle.
 531  *
 532  * It is guaranteed, that after successful return from fsh_hook_remove(),
 533  * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
 534  * go through this hook.
 535  *
 536  * There is no guarantee that after fsh_hook_remove() returns, the hook
 537  * associated with the handle won't be executing. Instead, it is guaranteed that
 538  * when remove_cb() is called, the hook finished it's execution in all threads.
 539  * It is safe to destroy all internal data associated with this hook inside
 540  * remove_cb().


 568         mutex_exit(&fsh_lock);
 569 
 570         fshi_rele(fshi);
 571 
 572         return (0);
 573 }
 574 
 575 /*
 576  * API for installing global mount/free callbacks.
 577  *
 578  * fsh_callback_t fields:
 579  * fshc_arg - argument passed to the callbacks
 580  * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
 581  *      drops to 0
 582  * fshc_mount - callback fired right before returning from domount()
 583  * The first argument of these callbacks is the vfs_t that is mounted/freed.
 584  * The second one is the fshc_arg.
 585  *
 586  * fsh_callback_handle_t is filled out by this function.
 587  *
 588  * This function must NOT be called in a callback, because it will cause
 589  * a deadlock.
 590  *
 591  * Returns (-1) if hook/callback limit exceeded.


 592  */
 593 fsh_callback_handle_t
 594 fsh_callback_install(fsh_callback_t *callback)
 595 {
 596         fsh_callback_int_t *fshci;
 597         fsh_callback_handle_t handle;
 598 
 599         if ((handle = id_alloc(fsh_idspace)) == -1)
 600                 return (-1);
 601 
 602         fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
 603         (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
 604         fshci->fshci_handle = handle;
 605 
 606         /* If it is called in a {mount,free} callback, causes deadlock. */
 607         rw_enter(&fsh_cblist_lock, RW_WRITER);
 608         list_insert_head(&fsh_cblist, fshci);
 609         rw_exit(&fsh_cblist_lock);
 610 
 611         return (handle);
 612 }
 613 
 614 /*
 615  * API for removing global mount/free callbacks.
 616  *
 617  * This function must NOT be called in a callback, because it will cause
 618  * a deadlock.
 619  *
 620  * Returns (-1) if callback wasn't found, 0 otherwise.


 621  */
 622 int
 623 fsh_callback_remove(fsh_callback_handle_t handle)
 624 {
 625         fsh_callback_int_t *fshci;
 626 
 627         /* If it is called in a {mount,free} callback, causes deadlock. */
 628         rw_enter(&fsh_cblist_lock, RW_WRITER);
 629         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 630             fshci = list_next(&fsh_cblist, fshci)) {
 631                 if (fshci->fshci_handle == handle) {
 632                         list_remove(&fsh_cblist, fshci);
 633                         break;
 634                 }
 635         }
 636         rw_exit(&fsh_cblist_lock);
 637 


 638         if (fshci == NULL)
 639                 return (-1);
 640 
 641         kmem_free(fshci, sizeof (*fshci));
 642         id_free(fsh_idspace, handle);
 643 
 644         return (0);
 645 }
 646 
 647 /*
 648  * This function is executed right before returning from domount()@vfs.c.
 649  * We are sure that it's called only after fsh_init().
 650  * It executes all the mount callbacks installed in the fsh.
 651  *
 652  * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
 653  * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
 654  * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
 655  * section in the big theory statement at the top of this file.
 656  */
 657 void
 658 fsh_exec_mount_callbacks(vfs_t *vfsp)
 659 {
 660         fsh_callback_int_t *fshci;
 661         fsh_callback_t *cb;

 662 
 663         rw_enter(&fsh_cblist_lock, RW_READER);












 664         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 665             fshci = list_next(&fsh_cblist, fshci)) {
 666                 cb = &fshci->fshci_cb;
 667                 if (cb->fshc_mount != NULL)
 668                         (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
 669         }
 670         rw_exit(&fsh_cblist_lock);






 671 }
 672 
 673 /*
 674  * This function is executed right before VFS_FREEVFS() is called in
 675  * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
 676  * It executes all the free callbacks installed in the fsh.
 677  *
 678  * free() callback is the point after the handles associated with the hooks
 679  * installed on this vfs_t become invalid
 680  */
 681 void
 682 fsh_exec_free_callbacks(vfs_t *vfsp)
 683 {
 684         fsh_callback_int_t *fshci;
 685         fsh_callback_t *cb;

 686 
 687         rw_enter(&fsh_cblist_lock, RW_READER);












 688         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 689             fshci = list_next(&fsh_cblist, fshci)) {
 690                 cb = &fshci->fshci_cb;
 691                 if (cb->fshc_free != NULL)
 692                         (*(cb->fshc_free))(vfsp, cb->fshc_arg);
 693         }
 694         rw_exit(&fsh_cblist_lock);






 695 }
 696 
 697 /*
 698  * API for vnode.c/vfs.c to start executing the fsh for a given operation.
 699  *
 700  * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
 701  * does, it executes it. If not, underlying vnodeop/vfsop is called.
 702  *
 703  * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
 704  * absolutely necessary to call fsh_init() before using them. That's done in
 705  * vfsinit().
 706  *
 707  * While these functions are executing, it's expected that necessary vfs_t's
 708  * are held so that vfs_free() isn't called. vfs_free() expects that noone
 709  * accesses vfs_fshrecord of a given vfs_t.
 710  * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
 711  * alive and valid.
 712  * All these expectations are met because these functions are used only in
 713  * correspondng {fop,fsop}_foo() functions.
 714  */
 715 int
 716 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 717         caller_context_t *ct)
 718 {
 719         int ret;
 720         fsh_fsrecord_t *fsrecp;
 721         fsh_int_t *fshi;


 722 
 723         fsh_prepare_fsrec(vp->v_vfsp);
 724         fsrecp = vp->v_vfsp->vfs_fshrecord;
 725 
 726         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 727         if (!(fsrecp->fshfsr_enabled)) {
 728                 rw_exit(&fsrecp->fshfsr_lock);
 729                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 730         }
 731 



 732         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 733             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 734                 if (fshi->fshi_hooks.read != NULL)
 735                         if (fshi_hold(fshi))
 736                                 break;



 737         }


 738         rw_exit(&fsrecp->fshfsr_lock);
 739 
 740         if (fshi == NULL)
 741                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));







 742 
 743         ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,







 744             vp, uiop, ioflag, cr, ct);
 745         fshi_rele(fshi);




 746         return (ret);
 747 }
 748 
 749 int
 750 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 751         caller_context_t *ct)
 752 {
 753         fsh_int_t *fshi;
 754         int ret;
 755         fsh_fsrecord_t *fsrecp;



 756 
 757         fsh_prepare_fsrec(vp->v_vfsp);
 758         fsrecp = vp->v_vfsp->vfs_fshrecord;
 759 
 760         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 761         if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
 762                 rw_exit(&fsrecp->fshfsr_lock);
 763                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 764         }
 765 



 766         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 767             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 768                 if (fshi->fshi_hooks.write != NULL)
 769                         if (fshi_hold(fshi))
 770                                 break;



 771         }


 772         rw_exit(&fsrecp->fshfsr_lock);
 773 
 774         if (fshi == NULL)
 775                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));







 776 
 777         ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,







 778             vp, uiop, ioflag, cr, ct);
 779         fshi_rele(fshi);




 780         return (ret);
 781 }
 782 
 783 int
 784 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 785 {

 786         fsh_fsrecord_t *fsrecp;
 787         fsh_int_t *fshi;
 788         int ret;

 789 
 790         fsh_prepare_fsrec(vfsp);
 791         fsrecp = vfsp->vfs_fshrecord;
 792 
 793         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 794         if (!(fsrecp->fshfsr_enabled)) {
 795                 rw_exit(&fsrecp->fshfsr_lock);
 796                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 797         }
 798 



 799         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 800             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 801                 if (fshi->fshi_hooks.mount != NULL)
 802                         if (fshi_hold(fshi))
 803                                 break;



 804         }


 805         rw_exit(&fsrecp->fshfsr_lock);
 806 
 807         if (fshi == NULL)
 808                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));







 809 
 810         ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,







 811             vfsp, mvp, uap, cr);
 812         fshi_rele(fshi);




 813         return (ret);
 814 }
 815 
 816 int
 817 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 818 {

 819         fsh_fsrecord_t *fsrecp;
 820         fsh_int_t *fshi;
 821         int ret;

 822 
 823         fsh_prepare_fsrec(vfsp);
 824         fsrecp = vfsp->vfs_fshrecord;
 825 
 826         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 827         if (!(fsrecp->fshfsr_enabled)) {
 828                 rw_exit(&fsrecp->fshfsr_lock);
 829                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 830         }
 831 



 832         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 833             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 834                 if (fshi->fshi_hooks.unmount != NULL)
 835                         if (fshi_hold(fshi))
 836                                 break;



 837         }


 838         rw_exit(&fsrecp->fshfsr_lock);
 839 
 840         if (fshi == NULL)
 841                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));







 842 
 843         ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,







 844             vfsp, flag, cr);
 845         fshi_rele(fshi);




 846         return (ret);
 847 }
 848 
 849 /*
 850  * This is the funtion used by fsh_prepare_fsrec() to allocate a new
 851  * fsh_fsrecord. This function is called by the first function which
 852  * access the vfs_fshrecord and finds out it's NULL.
 853  */
 854 static fsh_fsrecord_t *
 855 fsh_fsrec_create()
 856 {
 857         fsh_fsrecord_t *fsrecp;
 858 
 859         fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
 860         list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
 861             offsetof(fsh_int_t, fshi_next));
 862         rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
 863         fsrecp->fshfsr_enabled = 1;
 864         return (fsrecp);
 865 }
 866 
 867 
 868 /*
 869  * This call can be used ONLY in vfs_free(). It's assumed that no other
 870  * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
 871  * are executing while a call to fsh_fsrec_destroy() is made. With this
 872  * assumptions, no concurrency issues occur.
 873  *
 874  * Before calling this function outside the fsh, it's sufficient and
 875  * required to check if the passed fsh_fsrecord * is not NULL. We don't
 876  * have to check if it is not equal to fsh_res_ptr, because all the fsh API
 877  * calls involving this vfs_t should end before vfs_free() is called
 878  * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
 879  * guaranteed by the explicit requirement that the caller of fsh API holds
 880  * the vfs_t when needed.
 881  *
 882  * All the remaining hooks are being removed.
 883  */
 884 void
 885 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
 886 {
 887         fsh_int_t *fshi;
 888 
 889         VERIFY(fsrecp != NULL);
 890 
 891         _NOTE(CONSTCOND)
 892         while (1) {
 893                 mutex_enter(&fsh_lock);
 894                 /* No need here to hold fshfsr_lock */
 895                 fshi = list_remove_head(&fsrecp->fshfsr_list);

 896                 if (fshi == NULL) {
 897                         mutex_exit(&fsh_lock);
 898                         break;
 899                 }
 900                 ASSERT(fshi->fshi_doomed == 0);
 901                 list_remove(&fsh_map, fshi);
 902                 mutex_exit(&fsh_lock);
 903 
 904                 if (fshi->fshi_hooks.remove_cb != NULL)
 905                         (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
 906                             fshi->fshi_handle);

 907                 id_free(fsh_idspace, fshi->fshi_handle);
 908                 mutex_destroy(&fshi->fshi_lock);
 909                 kmem_free(fshi, sizeof (*fshi));
 910 
 911         }
 912 
 913         list_destroy(&fsrecp->fshfsr_list);
 914         rw_destroy(&fsrecp->fshfsr_lock);
 915         kmem_free(fsrecp, sizeof (*fsrecp));
 916 }
 917 
 918 /*
 919  * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
 920  * before every other fsh call.
 921  */
 922 void
 923 fsh_init(void)
 924 {
 925         rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);

 926         list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
 927             offsetof(fsh_callback_int_t, fshci_next));
 928 
 929         mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
 930 
 931         list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
 932             fshi_global));
 933 
 934         /* See comment above fsh_prepare_fsrec() */
 935         fsh_res_ptr = (void *)-1;
 936 
 937         fsh_idspace = id_space_create("fsh", 0, fsh_limit);
 938 }
 939 
 940 /*
 941  * These functions are used to pass control to the next hook or underlying
 942  * vop or vfsop. It's client doesn't have to worry about any locking.
 943  */
 944 int
 945 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 946         cred_t *cr, caller_context_t *ct)
 947 {
 948         int ret;
 949         fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
 950 
 951         /*
 952          * The passed fshi is the previous hook (the one from which we've been
 953          * called). We need to find the next one.
 954          */
 955         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 956         for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
 957             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 958                 if (fshi->fshi_hooks.read != NULL)
 959                         if (fshi_hold(fshi))
 960                                 break;
 961         }
 962         rw_exit(&fsrecp->fshfsr_lock);
 963 
 964         if (fshi == NULL)
 965                 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
 966 
 967         ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
 968             vp, uiop, ioflag, cr, ct);
 969         fshi_rele(fshi);
 970         return (ret);
 971 }
 972 
 973 int
 974 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 975         cred_t *cr, caller_context_t *ct)
 976 {
 977         fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
 978         int ret;
 979 
 980         /*
 981          * The passed fshi is the previous hook (the one from which we've been
 982          * called). We need to find the next one.
 983          */
 984         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 985         for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
 986             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 987                 if (fshi->fshi_hooks.write != NULL)
 988                         if (fshi_hold(fshi))
 989                                 break;
 990         }
 991         rw_exit(&fsrecp->fshfsr_lock);
 992 
 993         if (fshi == NULL)
 994                 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
 995 
 996         ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
 997             vp, uiop, ioflag, cr, ct);
 998         fshi_rele(fshi);
 999         return (ret);
1000 }
1001 
1002 int
1003 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
1004         cred_t *cr)
1005 {
1006         fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1007         int ret;
1008 
1009         /*
1010          * The passed fshi is the previous hook (the one from which we've been
1011          * called). We need to find the next one.
1012          */
1013         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1014         for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1015             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1016                 if (fshi->fshi_hooks.mount != NULL)
1017                         if (fshi_hold(fshi))
1018                                 break;
1019         }
1020         rw_exit(&fsrecp->fshfsr_lock);
1021 
1022         if (fshi == NULL)
1023                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
1024 
1025         ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
1026             vfsp, mvp, uap, cr);
1027         fshi_rele(fshi);
1028         return (ret);
1029 }
1030 
1031 int
1032 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
1033 {
1034         fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1035         int ret;
1036 
1037         /*
1038          * The passed fshi is the previous hook (the one from which we've been
1039          * called). We need to find the next one.
1040          */
1041         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1042         for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1043             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1044                 if (fshi->fshi_hooks.unmount != NULL)
1045                         if (fshi_hold(fshi))
1046                                 break;
1047         }
1048         rw_exit(&fsrecp->fshfsr_lock);
1049 
1050         if (fshi == NULL)
1051                 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
1052 
1053         ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
1054             vfsp, flag, cr);
1055         fshi_rele(fshi);
1056         return (ret);
1057 }


  18 #include <sys/fsh.h>
  19 #include <sys/fsh_impl.h>
  20 #include <sys/id_space.h>
  21 #include <sys/kmem.h>
  22 #include <sys/ksynch.h>
  23 #include <sys/list.h>
  24 #include <sys/sunddi.h>
  25 #include <sys/sysmacros.h>
  26 #include <sys/types.h>
  27 #include <sys/vfs.h>
  28 #include <sys/vnode.h>
  29 
  30 /*
  31  * Filesystem hook framework (fsh)
  32  *
  33  * 1. Abstract.
  34  * The main goal of the filesystem hook framework is to provide an easy way to
  35  * inject client-defined behaviour into vfs/vnode calls. fsh works on
  36  * vfs_t granularity.
  37  *
  38  * Note: In this document, both an fsh_t structure and hooking function for a
  39  * vnodeop/vfsop is referred to as *hook*.
  40  *
  41  *
  42  * 2. Overview.
  43  * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
  44  *      - pointers to hooking functions
  45  *      - an argument to pass (this is shared for all the hooks in a given
  46  *      fsh_t)
  47  *      - a pointer to the *hook remove callback*



  48  *
  49  * The information from fsh_t is copied by the fsh and an fsh_handle_t
  50  * is returned. It should be used for further removing.
  51  *
  52  *
  53  * 3. Usage.
  54  * It is expected that vfs_t/vnode_t passed to fsh_foo() functions are held by
  55  * the caller when needed. fsh does no vfs_t/vnode_t locking.
  56  *
  57  * fsh_t is a structure filled out by the client. It contains:
  58  *      - pointers to hooking functions
  59  *      - the argument passed to the hooks
  60  *      - the *hook remove callback*








  61  *
  62  * If a client does not want to add a hook for function foo(), he should fill
  63  * corresponding fields with NULLs. For every vfsop/vnodeop there are two
  64  * fields: pre_foo() and post_foo(). These are the functions called before and
  65  * after the next hook or underlying vfsop/vnodeop.
  66  *
  67  * Pre hooks take:
  68  *      - arg
  69  *      - pointer to a field containing void* - it should be filled whenever
  70  *      the client wants to have some data shared by the pre and post hooks in
  71  *      the same syscall execution. This is called the *instance data*.
  72  *      - pointers to the arguments passed to the underlying vfsop/vnodeop
  73  * Pre hooks return void.
  74  *
  75  * Post hooks take:
  76  *      - value returned by the previous post hook or underlying vfsop/vnodeop
  77  *      - arg
  78  *      - pointer to the *instance data*
  79  *      - arguments passed to the underlying vfsop/vnodeop
  80  * Post hooks return an int, which should be treated as the vfsop/vnodeop
  81  * return value.
  82  * Memory allocated by pre hook must be deallocated by the post hook.
  83  *
  84  * Execution path of hooks A, B, C is as follows:
  85  * foo()
  86  *      preA(argA, &instancepA, ...);
  87  *      preB(argB, &instancepB, ...);
  88  *      preC(argC, &instancepC, ...);
  89  *      ret = VOP_FOO();
  90  *      ret = postC(ret, argC, instancepC, ...);
  91  *      ret = postB(ret, argB, instancepB, ...);
  92  *      ret = postC(ret, argA, instancepA, ...);
  93  *      return (ret);
  94  *
  95  * After installation, an fsh_handle_t is returned to the caller.
  96  *
  97  * Hook remove callback - it's a function being fired after a hook is removed
  98  * and no thread is going to execute it anymore. It's safe to destroy all the
  99  * data associated with this hook inside it.









 100  *
 101  * It is guaranteed, that whenever a pre_hook() is called, there will be also
 102  * post_hook() called within the same syscall.
 103  *
 104  * If a hook (HNew) is installed/removed on/from a vfs_t within execution of
 105  * another hook (HExec) installed on this vfs_t, the syscall that executes
 106  * HExec won't fire HNew.
 107  *
 108  * A client might want to fire callbacks when vfs_ts are being mounted
 109  * or freed. There's an fsh_callback_t structure provided to install such
 110  * callbacks along with the API.
 111  * It is legal to call fsh_hook_{install,remove}() inside a mount callback
 112  * WITHOUT holding the vfs_t.
 113  *
 114  * After vfs_t's free callback returns, all the handles associated with the
 115  * hooks installed on this vfs_t are invalid and must not be used.
 116  *

 117  * 4. API
 118  * None of the APIs should be called during interrupt context above lock
 119  * level.

 120  *
 121  * a) fsh.h
 122  * Any of these functions could be called in a hook or a hook remove callback.
 123  * The only functions that must not be called inside a {mount,free} callback are
 124  * fsd_callback_{install,remove}. Using them will cause a deadlock.


 125  *
 126  *
 127  * fsh_fs_enable(vfs_t *vfsp)
 128  * fsh_fs_disable(vfs_t *vfsp)
 129  *      Enables/disables fsh for a given vfs_t.
 130  *
 131  * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 132  *      Installs hooks on vfsp filesystem.
 133  *      It's important that hooks are executed in LIFO installation order,
 134  *      which means that if there are hooks A and B installed in this order, B
 135  *      is going to be executed before A.
 136  *      It returns a correct handle, or (-1) if hook/callback limit exceeded.
 137  *      The handle is valid until a free callback returns or an explicit call
 138  *      to fsh_hook_remove().
 139  *
 140  * fsh_hook_remove(fsh_handle_t handle)
 141  *      Removes a hook and invalidates the handle.
 142  *      It is guaranteed that after this funcion returns, calls to
 143  *      vnodeops/vfsops won't go through this hook, although there might be
 144  *      some threads still executing this hook. When hook remove callback is
 145  *      fired, it is guaranteed that the hook won't be executed anymore. It is
 146  *      safe to remove all the internal data associated with this hook inside
 147  *      the hook remove callback. The hook remove callback could be called
 148  *      inside fsh_hook_remove().
 149  *





 150  *
 151  * fsh_callback_install(fsh_callback_t *callback)
 152  * fsh_callback_remove(fsh_callback_handle_t handle)
 153  *      Installs/removes callbacks for vfs_t mount/free. The mount callback
 154  *      is executed right before domount() returns. The free callback is
 155  *      called right before VFS_FREEVFS() is called.
 156  *      The fsh_callback_install() returns a correct handle, or (-1) if
 157  *      hook/callback limit exceeded.
 158  *
 159  *
 160  * b) fsh_impl.h (for vfs.c and vnode.c only)
 161  * fsh_init()
 162  *      This call has to be done in vfsinit(). It initialises the fsh. It
 163  *      is absolutely necessary that this call is made before any other fsh
 164  *      operation.
 165  *
 166  * fsh_exec_mount_callbacks(vfs_t *vfsp)
 167  * fsh_exec_free_callbacks(vfs_t *vfsp)
 168  *      Used to execute all fsh callbacks for {mount,free} of a vfs_t.
 169  *
 170  * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
 171  *      Destroys an fsh_fsrecord structure. All the hooks installed on this
 172  *      vfs_t are then destroyed. free callback is called before this function.
 173  *
 174  * fsh_foo(ARGUMENTS)
 175  *      Function used to execute the hook chain for a given syscall.
 176  *
 177  *
 178  * 5. Internals.
 179  * fsh_int_t is an internal hook structure. It is reference counted.
 180  * fshi_hold() and fshi_rele() should be used whenever needed.
 181  * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
 182  * (local to vfs_t). All entries are unique and are identified by fshi_handle.
 183  *
 184  * fsh_int_t properties:
 185  *      - fsh_hook_install() sets the ref. counter to 1 and adds it to both
 186  *      fsh_map and fshfsr_list
 187  *      - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
 188  *      from fsh_map and marks the hook as *doomed*
 189  *      - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
 190  *      executing it
 191  *      - if fsh_int_t is marked as *doomed*, the reference counter is not
 192  *      be increased and thus no thread can acquire this fsh_int_t
 193  *      - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
 194  *      also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
 195  *      - fsh_int_t could be also destroyed without fsh_hook_remove() call,
 196  *      that happens only inside fsh_fsrec_destroy() where it is guaranteed
 197  *      that there is no thread executing the hook
 198  *
 199  *
 200  * fsh_fsrecord_t is a structure which lives inside a vfs_t.
 201  * fsh_fsrecord_t contains:
 202  *      - an rw-lock that protects the structure
 203  *      - a list of hooks installed on this vfs_t
 204  *      - a flag which tells whether fsh is enabled on this vfs_t
 205  *
 206  *
 207  * fsh_fsrec_prepare rule:
 208  * Every function that needs vfsp->vfs_fshrecord has to call
 209  * fsh_fsrec_prepare() first. If and only if the call is made, it is safe to
 210  * use vfsp->vfs_fshrecord.
 211  *
 212  * Unfortunately, because of unexpected behaviour of some filesystems (no use
 213  * of vfs_alloc()/vfs_init()) there's no good place to initialise the
 214  * fsh_fshrecord_t structure. The approach being used here is to check if it's
 215  * initialised in every call. Because of the fact that no lock could be used
 216  * here (the same problem with initialisation), a spinlock is used.  This is
 217  * explained in more detail in a comment before fsh_fsrec_prepare(). After
 218  * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
 219  * pointer locally, because it won't be changed until vfs_free() is called.
 220  *
 221  * Exceptions from this rule:
 222  * - vfs_free() - it is expected that no other fsh calls would be made for the
 223  * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
 224  * valid pointer and could not be concurrently accessed.
 225  * - fshi_rele() - fsh_hook_install() comes before first fshi_rele() call;
 226  * the fsh_fsrecord_t has been initialised there
 227  *
 228  *
 229  * When there are no fsh functions (that use a particular fsh_fsrecord_t)
 230  * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
 231  * would be NULL or a pointer to an initialised fsh_fsrecord_t.
 232  *
 233  * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
 234  * passing it to fsh_fsrec_destroy. We don't have to check if it is not equal
 235  * to fsh_res_ptr, because all the fsh API calls involving this vfs_t should
 236  * end before vfs_free() is called (outside the fsh, fsh_fsrecord is never
 237  * equal to fsh_res_ptr). That is guaranteed by the explicit requirement that
 238  * the caller of fsh API holds the vfs_t when needed. fsh_hook_remove() must not
 239  * be called either, because the handles are invalidated after free callback has
 240  * fired.
 241  *
 242  *
 243  * Callbacks:
 244  * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
 245  * before returning from domount()@vfs.c.
 246  *
 247  * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
 248  * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
 249  *
 250  *





 251  * 6. Locking
 252  * a) public
 253  * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
 254  * needed, the client does that.
 255  *
 256  * No locks are held across hooks or hook remove callbacks execution. It is
 257  * safe to use fsh API inside hooks and hook remove callbacks.
 258  *
 259  * fsh_cb_lock is held across {mount,free} callbacks. Calling
 260  * fsh_callback_{install,remove} inside of a callback will cause a deadlock.
 261  *
 262  * b) internals
 263  * Locking diagram:
 264  *
 265  *     fsh_hook_remove()          fsh_hook_install()   fsh_fsrec_destroy()
 266  *           |                            |                |
 267  *           |                            |                |
 268  *           +------------------+         |   +------------+
 269  *           |                  |         |   |
 270  *           |                  V         |   |
 271  *           V               +------------|---|-+
 272  *      fshi_rele()          |  fsh_lock  |   | |
 273  *      (sometimes)          +------------|---|-+
 274  *                                 |      |   |
 275  *                                 |      +---+-- fshfsr_lock, RW_WRITER -+

 276  *                                 |                                      |


 277  *                                 V                                      |
 278  *               +---------------------------------------+                |
 279  *               |               fsh_map                 |                |
 280  *               |                                       |                |
 281  *          +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|----------------+
 282  *          |    +------------------------------^--------+
 283  *          |                                   |
 284  *          |                                   |
 285  * fshfsr_lock, RW_READER              fshfsr_lock, RW_WRITER
 286  *          |                                   |
 287  *          |                                   |
 288  *   fsh_read(),                            fshi_rele()
 289  *   fsh_write(),
 290  *   ...                                Might be called from:
 291  *                                        fsh_hook_remove()
 292  *                                        fsh_read(), fsh_write(), ...

 293  *
 294  *
 295  * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
 296  * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
 297  * it destroys the unremoved hooks). It is used only when fsh_map needs to be
 298  * locked. The usage of this lock guarantees that the data in fsh_map and
 299  * fshfsr_lists is consistent.
 300  *
 301  * In order to make calling callbacks inside callbacks possible, fsh_cb_owner is
 302  * set by fsh_exec_{mount,free} callbacks to the thread that owns the
 303  * fsh_cb_lock.  It's always checked if we are owners of the mutex before
 304  * entering it.
 305  *
 306  */
 307 
 308 
 309 /* Internals */
 310 typedef struct fsh_int {
 311         fsh_handle_t    fshi_handle;
 312         fsh_t           fshi_hooks;
 313         vfs_t           *fshi_vfsp;
 314 
 315         kmutex_t        fshi_lock;
 316         uint64_t        fshi_ref;
 317         uint64_t        fshi_doomed;    /* changed inside fsh_lock */
 318 
 319         /* next node in fshfsr_list */
 320         list_node_t     fshi_node;
 321 
 322         /* next node in fsh_map */
 323         list_node_t     fshi_global;
 324 } fsh_int_t;
 325 
 326 typedef struct fsh_callback_int {
 327         fsh_callback_t  fshci_cb;
 328         fsh_callback_handle_t fshci_handle;
 329         list_node_t     fshci_node;
 330 } fsh_callback_int_t;
 331 
 332 
 333 typedef struct fsh_exec {
 334         fsh_int_t       *fshe_fshi;
 335         void            *fshe_instance;
 336         list_node_t     fshe_node;
 337 } fsh_exec_t;
 338 
 339 
 340 static kmutex_t fsh_lock;
 341 
 342 /*
 343  * fsh_fsrecord_t is the main internal structure. It's content is protected
 344  * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
 345  * the vfs_t that contains the fsh_fsrecord_t.
 346  */
 347 struct fsh_fsrecord {
 348         krwlock_t       fshfsr_lock;
 349         int             fshfsr_enabled;
 350         list_t          fshfsr_list;
 351 };
 352 
 353 /*
 354  * Global list of fsh_int_t. Protected by fsh_lock.
 355  */
 356 static list_t fsh_map;
 357 
 358 /*
 359  * Global list of fsh_callback_int_t.
 360  */
 361 static kmutex_t fsh_cb_lock;
 362 static kmutex_t fsh_cb_owner_lock;
 363 static kthread_t *fsh_cb_owner;
 364 static list_t fsh_cblist;
 365 
 366 /*
 367  * A reserved pointer for fsh purposes. It is used because of the method
 368  * chosen for solving concurrency issues with vfs_fshrecord. The full
 369  * explanation is in the big theory statement at the beginning of this
 370  * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
 371  */
 372 static void *fsh_res_ptr;
 373 
 374 static fsh_fsrecord_t *fsh_fsrec_create();
 375 
 376 int fsh_limit = INT_MAX;
 377 static id_space_t *fsh_idspace;
 378 
 379 /*
 380  * fsh_fsrec_prepare()
 381  *
 382  * Important note:
 383  * Before using this function, fsh_init() MUST be called. We do that in
 384  * vfsinit()@vfs.c.
 385  *
 386  * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
 387  * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
 388  * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
 389  * filesystems could do the same thing. That's why this solution is
 390  * introduced. It should be called before any code that needs access to
 391  * vfs_fshrecord.
 392  *
 393  * Locking:
 394  * There are no locks here, because there's no good place to initialise
 395  * the lock. Concurrency issues are solved by using atomic instructions
 396  * and a spinlock, which is spinning only once for a given vfs_t. Because
 397  * of that, the usage of the spinlock isn't bad at all.
 398  *
 399  * How it works:
 400  * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
 401  *      fsh_res_ptr. That's a signal for other threads, that the structure
 402  *      is being initialised.
 403  * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
 404  *      because vfs_fshrecord is being initialised by another call.
 405  * c) other cases:
 406  *      vfs_fshrecord is already initialised, so we can use it. It won't change
 407  *      until vfs_free() is called. It can't happen when someone is holding
 408  *      the vfs_t, which is expected from the caller of fsh API.
 409  */
 410 static void
 411 fsh_fsrec_prepare(vfs_t *vfsp)
 412 {
 413         fsh_fsrecord_t *fsrec;
 414 
 415         while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
 416             fsh_res_ptr)) == fsh_res_ptr)
 417                 ;
 418 
 419         if (fsrec == NULL)
 420                 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
 421 }
 422 
 423 /*
 424  * API for enabling/disabling fsh per vfs_t.
 425  *
 426  * A newly created vfs_t has fsh enabled by default. If one would want to change
 427  * this behaviour, mount callbacks could be used.
 428  *
 429  * The caller is expected to hold the vfs_t.
 430  *
 431  * These functions must NOT be called in a hook.
 432  */
 433 void
 434 fsh_fs_enable(vfs_t *vfsp)
 435 {
 436         fsh_fsrec_prepare(vfsp);
 437 
 438         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 439         vfsp->vfs_fshrecord->fshfsr_enabled = 1;
 440         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 441 }
 442 
 443 void
 444 fsh_fs_disable(vfs_t *vfsp)
 445 {
 446         fsh_fsrec_prepare(vfsp);
 447 
 448         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 449         vfsp->vfs_fshrecord->fshfsr_enabled = 0;
 450         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 451 }
 452 
 453 /*
 454  * API used for installing hooks. fsh_handle_t is returned for further
 455  * actions (currently just removing) on this set of hooks.
 456  *








 457  * It's important that the hooks are executed in LIFO installation order (they
 458  * are added to the head of the hook list).
 459  *
 460  * The caller is expected to hold the vfs_t.
 461  *
 462  * Returns (-1) if hook/callback limit exceeded, handle otherwise.
 463  */
 464 fsh_handle_t
 465 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 466 {
 467         fsh_handle_t    handle;
 468         fsh_int_t       *fshi;
 469 
 470         fsh_fsrec_prepare(vfsp);
 471 
 472         if ((handle = id_alloc(fsh_idspace)) == -1)
 473                 return (-1);
 474 
 475         fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
 476         mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
 477         (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
 478         fshi->fshi_handle = handle;
 479         fshi->fshi_doomed = 0;
 480         fshi->fshi_ref = 1;
 481         fshi->fshi_vfsp = vfsp;
 482 
 483         mutex_enter(&fsh_lock);
 484         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 485         list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
 486         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 487 
 488         list_insert_head(&fsh_map, fshi);
 489         mutex_exit(&fsh_lock);
 490 


 516 fshi_rele(fsh_int_t *fshi)
 517 {
 518         int destroy;
 519 
 520         mutex_enter(&fshi->fshi_lock);
 521         ASSERT(fshi->fshi_ref > 0);
 522         fshi->fshi_ref--;
 523         if (fshi->fshi_ref == 0) {
 524                 ASSERT(fshi->fshi_doomed == 1);
 525                 destroy = 1;
 526         } else {
 527                 destroy = 0;
 528         }
 529         mutex_exit(&fshi->fshi_lock);
 530 
 531         if (destroy) {
 532                 /*
 533                  * At this point, we are sure that fsh_hook_remove() has been
 534                  * called, that's why we don't remove the fshi from fsh_map.
 535                  * fsh_hook_remove() did that already.
 536                  * There is also no need to call fsh_fsrec_prepare() here.
 537                  */
 538                 fsh_fsrecord_t *fsrecp;
 539 



 540                 /*
 541                  * We don't have to call fsh_fsrec_prepare() here.
 542                  * fsh_fsrecord_t is already initialised, because we've found a
 543                  * mapping for the given handle.
 544                  */
 545                 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
 546                 ASSERT(fsrecp != NULL);
 547                 ASSERT(fsrecp != fsh_res_ptr);
 548 
 549                 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
 550                 list_remove(&fsrecp->fshfsr_list, fshi);
 551                 rw_exit(&fsrecp->fshfsr_lock);
 552 
 553                 if (fshi->fshi_hooks.remove_cb != NULL)
 554                         (*fshi->fshi_hooks.remove_cb)(
 555                             fshi->fshi_hooks.arg, fshi->fshi_handle);
 556 
 557                 id_free(fsh_idspace, fshi->fshi_handle);
 558                 mutex_destroy(&fshi->fshi_lock);
 559                 kmem_free(fshi, sizeof (*fshi));
 560         }
 561 }
 562 
 563 /*
 564  * Used for removing a hook set.
 565  *
 566  * fsh_hook_remove() invalidates the given handle.
 567  *
 568  * It is guaranteed, that after successful return from fsh_hook_remove(),
 569  * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
 570  * go through this hook.
 571  *
 572  * There is no guarantee that after fsh_hook_remove() returns, the hook
 573  * associated with the handle won't be executing. Instead, it is guaranteed that
 574  * when remove_cb() is called, the hook finished it's execution in all threads.
 575  * It is safe to destroy all internal data associated with this hook inside
 576  * remove_cb().


 604         mutex_exit(&fsh_lock);
 605 
 606         fshi_rele(fshi);
 607 
 608         return (0);
 609 }
 610 
 611 /*
 612  * API for installing global mount/free callbacks.
 613  *
 614  * fsh_callback_t fields:
 615  * fshc_arg - argument passed to the callbacks
 616  * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
 617  *      drops to 0
 618  * fshc_mount - callback fired right before returning from domount()
 619  * The first argument of these callbacks is the vfs_t that is mounted/freed.
 620  * The second one is the fshc_arg.
 621  *
 622  * fsh_callback_handle_t is filled out by this function.
 623  *



 624  * Returns (-1) if hook/callback limit exceeded.
 625  *
 626  * Calling this function in a {mount,free} callback will cause a deadlock.
 627  */
 628 fsh_callback_handle_t
 629 fsh_callback_install(fsh_callback_t *callback)
 630 {
 631         fsh_callback_int_t *fshci;
 632         fsh_callback_handle_t handle;
 633 
 634         if ((handle = id_alloc(fsh_idspace)) == -1)
 635                 return (-1);
 636 
 637         fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
 638         (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
 639         fshci->fshci_handle = handle;
 640 
 641         mutex_enter(&fsh_cb_lock);

 642         list_insert_head(&fsh_cblist, fshci);
 643         mutex_exit(&fsh_cb_lock);
 644 
 645         return (handle);
 646 }
 647 
 648 /*
 649  * API for removing global mount/free callbacks.
 650  *



 651  * Returns (-1) if callback wasn't found, 0 otherwise.
 652  *
 653  * Calling this function in a {mount,free} callback will cause a deadlock.
 654  */
 655 int
 656 fsh_callback_remove(fsh_callback_handle_t handle)
 657 {
 658         fsh_callback_int_t *fshci;
 659 
 660         mutex_enter(&fsh_cb_lock);
 661 
 662         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 663             fshci = list_next(&fsh_cblist, fshci)) {
 664                 if (fshci->fshci_handle == handle) {
 665                         list_remove(&fsh_cblist, fshci);
 666                         break;
 667                 }
 668         }

 669 
 670         mutex_exit(&fsh_cb_lock);
 671 
 672         if (fshci == NULL)
 673                 return (-1);
 674 
 675         kmem_free(fshci, sizeof (*fshci));
 676         id_free(fsh_idspace, handle);
 677 
 678         return (0);
 679 }
 680 
 681 /*
 682  * This function is executed right before returning from domount()@vfs.c.
 683  * We are sure that it's called only after fsh_init().
 684  * It executes all the mount callbacks installed in the fsh.
 685  *
 686  * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
 687  * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
 688  * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
 689  * section in the big theory statement at the top of this file.
 690  */
 691 void
 692 fsh_exec_mount_callbacks(vfs_t *vfsp)
 693 {
 694         fsh_callback_int_t *fshci;
 695         fsh_callback_t *cb;
 696         int fsh_context;
 697 
 698         mutex_enter(&fsh_cb_owner_lock);
 699         fsh_context = fsh_cb_owner == curthread;
 700         mutex_exit(&fsh_cb_owner_lock);
 701 
 702         if (!fsh_context) {
 703                 mutex_enter(&fsh_cb_lock);
 704                 mutex_enter(&fsh_cb_owner_lock);
 705                 fsh_cb_owner = curthread;
 706                 mutex_exit(&fsh_cb_owner_lock);
 707         }
 708 
 709         ASSERT(MUTEX_HELD(&fsh_cb_lock));
 710 
 711         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 712             fshci = list_next(&fsh_cblist, fshci)) {
 713                 cb = &fshci->fshci_cb;
 714                 if (cb->fshc_mount != NULL)
 715                         (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
 716         }
 717 
 718         if (!fsh_context) {
 719                 mutex_enter(&fsh_cb_owner_lock);
 720                 fsh_cb_owner = NULL;
 721                 mutex_exit(&fsh_cb_owner_lock);
 722                 mutex_exit(&fsh_cb_lock);
 723         }
 724 }
 725 
 726 /*
 727  * This function is executed right before VFS_FREEVFS() is called in
 728  * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
 729  * It executes all the free callbacks installed in the fsh.
 730  *
 731  * free() callback is the point after the handles associated with the hooks
 732  * installed on this vfs_t become invalid
 733  */
 734 void
 735 fsh_exec_free_callbacks(vfs_t *vfsp)
 736 {
 737         fsh_callback_int_t *fshci;
 738         fsh_callback_t *cb;
 739         int fsh_context;
 740 
 741         mutex_enter(&fsh_cb_owner_lock);
 742         fsh_context = fsh_cb_owner == curthread;
 743         mutex_exit(&fsh_cb_owner_lock);
 744 
 745         if (!fsh_context) {
 746                 mutex_enter(&fsh_cb_lock);
 747                 mutex_enter(&fsh_cb_owner_lock);
 748                 fsh_cb_owner = curthread;
 749                 mutex_exit(&fsh_cb_owner_lock);
 750         }
 751 
 752         ASSERT(MUTEX_HELD(&fsh_cb_lock));
 753 
 754         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 755             fshci = list_next(&fsh_cblist, fshci)) {
 756                 cb = &fshci->fshci_cb;
 757                 if (cb->fshc_free != NULL)
 758                         (*(cb->fshc_free))(vfsp, cb->fshc_arg);
 759         }
 760 
 761         if (!fsh_context) {
 762                 mutex_enter(&fsh_cb_owner_lock);
 763                 fsh_cb_owner = NULL;
 764                 mutex_exit(&fsh_cb_owner_lock);
 765                 mutex_exit(&fsh_cb_lock);
 766         }
 767 }
 768 
 769 /*
 770  * API for vnode.c/vfs.c to start executing the fsh for a given operation.
 771  *
 772  * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
 773  * does, it executes it. If not, underlying vnodeop/vfsop is called.
 774  *
 775  * These interfaces are using fsh_res_ptr (in fsh_fsrec_prepare()), so it's
 776  * absolutely necessary to call fsh_init() before using them. That's done in
 777  * vfsinit().
 778  *
 779  * While these functions are executing, it's expected that necessary vfs_t's
 780  * are held so that vfs_free() isn't called. vfs_free() expects that noone
 781  * accesses vfs_fshrecord of a given vfs_t.
 782  * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
 783  * alive and valid.
 784  * All these expectations are met because these functions are used only in
 785  * correspondng {fop,fsop}_foo() functions.
 786  */
 787 int
 788 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 789         caller_context_t *ct)
 790 {
 791         int ret;
 792         fsh_fsrecord_t *fsrecp;
 793         fsh_int_t *fshi;
 794         fsh_exec_t *fshe;
 795         list_t exec_list;
 796 
 797         fsh_fsrec_prepare(vp->v_vfsp);
 798         fsrecp = vp->v_vfsp->vfs_fshrecord;
 799 
 800         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 801         if (!(fsrecp->fshfsr_enabled)) {
 802                 rw_exit(&fsrecp->fshfsr_lock);
 803                 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
 804         }
 805 
 806         list_create(&exec_list, sizeof (fsh_exec_t),
 807             offsetof(fsh_exec_t, fshe_node));
 808 
 809         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 810             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 811                 if (fshi->fshi_hooks.pre_read != NULL ||
 812                     fshi->fshi_hooks.post_read != NULL) {
 813                         if (fshi_hold(fshi)) {
 814                                 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
 815                                 fshe->fshe_fshi = fshi;
 816                                 list_insert_tail(&exec_list, fshe);
 817                         }
 818                 }
 819         }
 820         rw_exit(&fsrecp->fshfsr_lock);
 821 
 822         /* Execute pre hooks */
 823         for (fshe = list_head(&exec_list); fshe != NULL;
 824             fshe = list_next(&exec_list, fshe)) {
 825                 if (fshe->fshe_fshi->fshi_hooks.pre_read != NULL)
 826                         (*fshe->fshe_fshi->fshi_hooks.pre_read)(
 827                             fshe->fshe_fshi->fshi_hooks.arg,
 828                             &fshe->fshe_instance,
 829                             &vp, &uiop, &ioflag, &cr, &ct);
 830         }
 831 
 832         ret = (*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
 833 
 834         /* Execute post hooks */
 835         while ((fshe = list_remove_tail(&exec_list)) != NULL) {
 836                 if (fshe->fshe_fshi->fshi_hooks.post_read != NULL)
 837                         ret = (*fshe->fshe_fshi->fshi_hooks.post_read)(
 838                             ret, fshe->fshe_fshi->fshi_hooks.arg,
 839                             fshe->fshe_instance,
 840                             vp, uiop, ioflag, cr, ct);
 841                 fshi_rele(fshe->fshe_fshi);
 842                 kmem_free(fshe, sizeof (*fshe));
 843         }
 844         list_destroy(&exec_list);
 845 
 846         return (ret);
 847 }
 848 
 849 int
 850 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 851         caller_context_t *ct)
 852 {

 853         int ret;
 854         fsh_fsrecord_t *fsrecp;
 855         fsh_int_t *fshi;
 856         fsh_exec_t *fshe;
 857         list_t exec_list;
 858 
 859         fsh_fsrec_prepare(vp->v_vfsp);
 860         fsrecp = vp->v_vfsp->vfs_fshrecord;
 861 
 862         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 863         if (!(fsrecp->fshfsr_enabled)) {
 864                 rw_exit(&fsrecp->fshfsr_lock);
 865                 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
 866         }
 867 
 868         list_create(&exec_list, sizeof (fsh_exec_t),
 869             offsetof(fsh_exec_t, fshe_node));
 870 
 871         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 872             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 873                 if (fshi->fshi_hooks.pre_write != NULL ||
 874                     fshi->fshi_hooks.post_write != NULL) {
 875                         if (fshi_hold(fshi)) {
 876                                 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
 877                                 fshe->fshe_fshi = fshi;
 878                                 list_insert_tail(&exec_list, fshe);
 879                         }
 880                 }
 881         }
 882         rw_exit(&fsrecp->fshfsr_lock);
 883 
 884         /* Execute pre hooks */
 885         for (fshe = list_head(&exec_list); fshe != NULL;
 886             fshe = list_next(&exec_list, fshe)) {
 887                 if (fshe->fshe_fshi->fshi_hooks.pre_write != NULL)
 888                         (*fshe->fshe_fshi->fshi_hooks.pre_write)(
 889                             fshe->fshe_fshi->fshi_hooks.arg,
 890                             &fshe->fshe_instance,
 891                             &vp, &uiop, &ioflag, &cr, &ct);
 892         }
 893 
 894         ret = (*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
 895 
 896         /* Execute post hooks */
 897         while ((fshe = list_remove_tail(&exec_list)) != NULL) {
 898                 if (fshe->fshe_fshi->fshi_hooks.post_write != NULL)
 899                         ret = (*fshe->fshe_fshi->fshi_hooks.post_write)(
 900                             ret, fshe->fshe_fshi->fshi_hooks.arg,
 901                             fshe->fshe_instance,
 902                             vp, uiop, ioflag, cr, ct);
 903                 fshi_rele(fshe->fshe_fshi);
 904                 kmem_free(fshe, sizeof (*fshe));
 905         }
 906         list_destroy(&exec_list);
 907 
 908         return (ret);
 909 }
 910 
 911 int
 912 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 913 {
 914         int ret;
 915         fsh_fsrecord_t *fsrecp;
 916         fsh_int_t *fshi;
 917         fsh_exec_t *fshe;
 918         list_t exec_list;
 919 
 920         fsh_fsrec_prepare(vfsp);
 921         fsrecp = vfsp->vfs_fshrecord;
 922 
 923         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 924         if (!(fsrecp->fshfsr_enabled)) {
 925                 rw_exit(&fsrecp->fshfsr_lock);
 926                 return ((*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr));
 927         }
 928 
 929         list_create(&exec_list, sizeof (fsh_exec_t),
 930             offsetof(fsh_exec_t, fshe_node));
 931 
 932         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 933             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 934                 if (fshi->fshi_hooks.pre_mount != NULL ||
 935                     fshi->fshi_hooks.post_mount != NULL) {
 936                         if (fshi_hold(fshi)) {
 937                                 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
 938                                 fshe->fshe_fshi = fshi;
 939                                 list_insert_tail(&exec_list, fshe);
 940                         }
 941                 }
 942         }
 943         rw_exit(&fsrecp->fshfsr_lock);
 944 
 945         /* Execute pre hooks */
 946         for (fshe = list_head(&exec_list); fshe != NULL;
 947             fshe = list_next(&exec_list, fshe)) {
 948                 if (fshe->fshe_fshi->fshi_hooks.pre_mount != NULL)
 949                         (*fshe->fshe_fshi->fshi_hooks.pre_mount)(
 950                             &fshe->fshe_fshi->fshi_hooks.arg,
 951                             &fshe->fshe_instance,
 952                             &vfsp, &mvp, &uap, &cr);
 953         }
 954 
 955         ret = (*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
 956 
 957         /* Execute post hooks */
 958         while ((fshe = list_remove_tail(&exec_list)) != NULL) {
 959                 if (fshe->fshe_fshi->fshi_hooks.post_mount != NULL)
 960                         ret = (*fshe->fshe_fshi->fshi_hooks.post_mount)(
 961                             ret, fshe->fshe_fshi->fshi_hooks.arg,
 962                             fshe->fshe_instance,
 963                             vfsp, mvp, uap, cr);
 964                 fshi_rele(fshe->fshe_fshi);
 965                 kmem_free(fshe, sizeof (*fshe));
 966         }
 967         list_destroy(&exec_list);
 968 
 969         return (ret);
 970 }
 971 
 972 int
 973 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 974 {
 975         int ret;
 976         fsh_fsrecord_t *fsrecp;
 977         fsh_int_t *fshi;
 978         fsh_exec_t *fshe;
 979         list_t exec_list;
 980 
 981         fsh_fsrec_prepare(vfsp);
 982         fsrecp = vfsp->vfs_fshrecord;
 983 
 984         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 985         if (!(fsrecp->fshfsr_enabled)) {
 986                 rw_exit(&fsrecp->fshfsr_lock);
 987                 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
 988         }
 989 
 990         list_create(&exec_list, sizeof (fsh_exec_t),
 991             offsetof(fsh_exec_t, fshe_node));
 992 
 993         for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
 994             fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
 995                 if (fshi->fshi_hooks.pre_unmount != NULL ||
 996                     fshi->fshi_hooks.post_unmount != NULL) {
 997                         if (fshi_hold(fshi)) {
 998                                 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
 999                                 fshe->fshe_fshi = fshi;
1000                                 list_insert_tail(&exec_list, fshe);
1001                         }
1002                 }
1003         }
1004         rw_exit(&fsrecp->fshfsr_lock);
1005 
1006         /* Execute pre hooks */
1007         for (fshe = list_head(&exec_list); fshe != NULL;
1008             fshe = list_next(&exec_list, fshe)) {
1009                 if (fshe->fshe_fshi->fshi_hooks.pre_unmount != NULL)
1010                         (*fshe->fshe_fshi->fshi_hooks.pre_unmount)(
1011                             fshe->fshe_fshi->fshi_hooks.arg,
1012                             &fshe->fshe_instance,
1013                             &vfsp, &flag, &cr);
1014         }
1015 
1016         ret = (*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr);
1017 
1018         /* Execute post hooks */
1019         while ((fshe = list_remove_tail(&exec_list)) != NULL) {
1020                 if (fshe->fshe_fshi->fshi_hooks.post_unmount != NULL)
1021                         ret = (*fshe->fshe_fshi->fshi_hooks.post_unmount)(
1022                             ret, fshe->fshe_fshi->fshi_hooks.arg,
1023                             fshe->fshe_instance,
1024                             vfsp, flag, cr);
1025                 fshi_rele(fshe->fshe_fshi);
1026                 kmem_free(fshe, sizeof (*fshe));
1027         }
1028         list_destroy(&exec_list);
1029 
1030         return (ret);
1031 }
1032 
1033 /*
1034  * This is the funtion used by fsh_fsrec_prepare() to allocate a new
1035  * fsh_fsrecord. This function is called by the first function which
1036  * access the vfs_fshrecord and finds out it's NULL.
1037  */
1038 static fsh_fsrecord_t *
1039 fsh_fsrec_create()
1040 {
1041         fsh_fsrecord_t *fsrecp;
1042 
1043         fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
1044         list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
1045             offsetof(fsh_int_t, fshi_node));
1046         rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
1047         fsrecp->fshfsr_enabled = 1;
1048         return (fsrecp);
1049 }
1050 
1051 
1052 /*
1053  * This call must be used ONLY in vfs_free().



1054  *
1055  * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
1056  * passing it to fsh_fsrec_destroy.





1057  *
1058  * All the remaining hooks are being removed here.
1059  */
1060 void
1061 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
1062 {
1063         fsh_int_t *fshi;
1064 
1065         VERIFY(fsrecp != NULL);
1066 
1067         _NOTE(CONSTCOND)
1068         while (1) {
1069                 mutex_enter(&fsh_lock);
1070                 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
1071                 fshi = list_remove_head(&fsrecp->fshfsr_list);
1072                 rw_exit(&fsrecp->fshfsr_lock);
1073                 if (fshi == NULL) {
1074                         mutex_exit(&fsh_lock);
1075                         break;
1076                 }
1077                 ASSERT(fshi->fshi_doomed == 0);
1078                 list_remove(&fsh_map, fshi);
1079                 mutex_exit(&fsh_lock);
1080 
1081                 if (fshi->fshi_hooks.remove_cb != NULL)
1082                         (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
1083                             fshi->fshi_handle);
1084 
1085                 id_free(fsh_idspace, fshi->fshi_handle);
1086                 mutex_destroy(&fshi->fshi_lock);
1087                 kmem_free(fshi, sizeof (*fshi));
1088 
1089         }
1090 
1091         list_destroy(&fsrecp->fshfsr_list);
1092         rw_destroy(&fsrecp->fshfsr_lock);
1093         kmem_free(fsrecp, sizeof (*fsrecp));
1094 }
1095 
1096 /*
1097  * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
1098  * before every other fsh call.
1099  */
1100 void
1101 fsh_init(void)
1102 {
1103         mutex_init(&fsh_cb_lock, NULL, MUTEX_DRIVER, NULL);
1104         mutex_init(&fsh_cb_owner_lock, NULL, MUTEX_DRIVER, NULL);
1105         list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
1106             offsetof(fsh_callback_int_t, fshci_node));
1107 
1108         mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
1109 
1110         list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
1111             fshi_global));
1112 
1113         /* See comment above fsh_fsrec_prepare() */
1114         fsh_res_ptr = (void *)-1;
1115 
1116         fsh_idspace = id_space_create("fsh", 0, fsh_limit);























































































































1117 }