1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 Damian Bogel.  All rights reserved.
  14  */
  15 
  16 #include <sys/debug.h>
  17 #include <sys/errno.h>
  18 #include <sys/fsh.h>
  19 #include <sys/fsh_impl.h>
  20 #include <sys/id_space.h>
  21 #include <sys/kmem.h>
  22 #include <sys/ksynch.h>
  23 #include <sys/list.h>
  24 #include <sys/sunddi.h>
  25 #include <sys/sysmacros.h>
  26 #include <sys/types.h>
  27 #include <sys/vfs.h>
  28 #include <sys/vnode.h>
  29 
  30 /*
  31  * Filesystem hook framework (fsh)
  32  *
  33  * 1. Abstract.
  34  * The main goal of the filesystem hook framework is to provide an easy way to
  35  * inject client-defined behaviour into vfs/vnode calls. fsh works on
  36  * vfs_t granularity.
  37  *
  38  *
  39  * 2. Overview.
  40  * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
  41  *      - pointers to hooking functions (named after corresponding
  42  *      vnodeops/vfsops)
  43  *      - a pointer to an argument to pass (this is shared for all the
  44  *      hooks in a given fsh_t)
  45  *
  46  * The information from fsh_t is copied by the fsh and an fsh_handle_t
  47  * is returned. It should be used for further removing.
  48  *
  49  *
  50  * 3. Usage.
  51  * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
  52  * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
  53  *
  54  * fsh_t is a structure filled out by the client. If a client does not want
  55  * to add/remove a hook for function foo(), he should fill the foo field of
  56  * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
  57  * two additional arguments:
  58  *      - fsh_int_t *fsh_int - this argument MUST be passed to
  59  *      hook_next_foo(). fsh wouldn't know which hook to execute next
  60  *      without it
  61  *      - void *arg - this is the argument passed with fsh_t during
  62  *      installation
  63  * After installation, an fsh_handle_t is returned to the caller.
  64  *
  65  * A client might want to fire callbacks when vfs_t's are being mounted
  66  * or freed. There's an fsh_callback_t structure provided to install such
  67  * callbacks along with the API.
  68  * It is legal to call fsh_hook_{install,remove}() inside a mount callback
  69  * WITHOUT holding the vfs_t.
  70  *
  71  * After vfs_t's free callback returns, all the handles associated with the
  72  * hooks installed on this vfs_t are invalid and must not be used.
  73  *
  74  *
  75  * Every hook function is responsible for passing the control to the next
  76  * hook associated with a particular call. In order to provide an easy way to
  77  * modify the behaviour of a function call both before and after the
  78  * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
  79  * fsh_next_foo() at some point. This function does necessary internal
  80  * operations and calls the next hook, until there's no hook left, then it
  81  * calls the underlying vfsop/vnodeop.
  82  * Example:
  83  * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
  84  *      cmn_err(CE_NOTE, "freefs called!\n");
  85  *      return (fsh_next_freefs(fsh_int, vfsp));
  86  * }
  87  *
  88  *
  89  * 4. API
  90  * None of the APIs should be called during interrupt context above lock
  91  * level. The only exceptions are fsh_next_foo() functions, which do not use
  92  * locks.
  93  *
  94  * a) fsh.h
  95  * None of the functions listed below should be called inside of a hook
  96  * Doing so will cause a deadlock. The only exceptions are fsh_next_foo() and
  97  * fsh_callback_{install,remove}().
  98  *
  99  * fsh_callback_{install,remove}() should not be called inside of a {mount,free}
 100  * callback. Doing so will cause a deadlock.
 101  *
 102  * fsh_fs_enable(vfs_t *vfsp)
 103  * fsh_fs_disable(vfs_t *vfsp)
 104  *      Enables/disables fsh for a given vfs_t.
 105  *
 106  * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 107  *      Installs hooks on vfsp filesystem. It's important that hooks are
 108  *      executed in LIFO installation order, which means that if there are
 109  *      hooks A and B installed in this order, B is going to be executed
 110  *      before A. It returns a correct handle, or (-1) if hook/callback
 111  *      limit exceeded. The handle is valid until a free callback returns
 112  *      or an explicit call to fsh_hook_remove().
 113  *
 114  * fsh_hook_remove(fsh_handle_t handle)
 115  *      Removes a hook and invalidates the handle. It is guaranteed that after
 116  *      this funcion returns, hook associated with this handle won't be
 117  *      executing.
 118  *
 119  * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
 120  *      This is the function which should be called once in every hook. It
 121  *      does the necessary internal operations and passes control to the
 122  *      next hook or, if there's no hook left, to the underlying
 123  *      vfsop/vnodeop.
 124  *
 125  * fsh_callback_install(fsh_callback_t *callback)
 126  * fsh_callback_remove(fsh_callback_handle_t handle)
 127  *      Installs/removes callbacks for vfs_t mount/free. The mount callback
 128  *      is executed right before domount() returns. The free callback is
 129  *      called right before VFS_FREEVFS() is called. The
 130  *      fsh_callback_install() returns a correct handle, or (-1) if
 131  *      hook/callback limit exceeded.
 132  *
 133  * b) fsh_impl.h (for vfs.c and vnode.c only)
 134  * fsh_init()
 135  *      This call has to be done in vfsinit(). It initialises the fsh. It
 136  *      is absolutely necessary that this call is made before any other fsh
 137  *      operation.
 138  *
 139  * fsh_exec_mount_callbacks(vfs_t *vfsp)
 140  * fsh_exec_free_callbacks(vfs_t *vfsp)
 141  *      Used to execute all fsh callbacks for {mount,free} of a vfs_t.
 142  *
 143  * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
 144  *      Destroys an fsh_fsrecord structure. All the hooks installed on this
 145  *      vfs_t are then destroyed. free callback is called before this function.
 146  *
 147  * fsh_foo(ARGUMENTS)
 148  *      Function used to start executing the hook chain for a given call.
 149  *
 150  * 5. Internals.
 151  * fsh_fsrecord_t is a structure which lives inside a vfs_t.
 152  * fsh_fsrecord_t contains:
 153  *      - an rw-lock that protects the structure
 154  *      - a list of hooks installed on this vfs_t
 155  *      - a flag which tells whether fsh is enabled on this vfs_t
 156  *
 157  * Unfortunately, because of unexpected behaviour of some filesystems (no use of
 158  * vfs_alloc()/vfs_init()) there's no good place to initialise the
 159  * fsh_fshrecord_t structure. The approach being used here is to check if it's
 160  * initialised in every call. Because of the fact that no lock could be used
 161  * here (the same problem with initialisation), a spinlock is used.  This is
 162  * explained in more detail in a comment before fsh_prepare_fsrec(), a function
 163  * that MUST be used whenever a vfsp->vfs_fshrecord needs to be accessed.  After
 164  * doing that, it's completely safe to keep this pointer locally, because it
 165  * won't be changed until vfs_free() is called.
 166  *
 167  * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
 168  * where there is expected that no other fsh calls would be made for the
 169  * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
 170  * valid pointer and could not be concurrently accessed.
 171  *
 172  * When there are no fsh functions (that use a particular fsh_fsrecord_t)
 173  * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
 174  * would be NULL or a pointer to an initialised fsh_fsrecord_t.
 175  *
 176  *
 177  * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
 178  * before returning from domount()@vfs.c.
 179  *
 180  * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
 181  * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
 182  *
 183  *
 184  * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
 185  *      This function is quite simple. It takes the fsh_int_t and passes control
 186  *      to the next hook or to the underlying vnodeop/vfsop. fshi is always the
 187  *      next hook to be executed.
 188  *
 189  *
 190  * 6. Concurrency
 191  * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
 192  * needed, the client does that.
 193  *
 194  * An fsh_fsrecord_t of a vfs_t is read-locked (fshfsr_lock) by every
 195  * fsh_foo() function (with the mentioned vfs_t as a parameter, of course).
 196  * This means that fsh_hook_{install,remove}() must NOT be called inside of
 197  * a hook, because it will cause a deadlock.
 198  *
 199  * The same thing applies to callbacks. fsh_cblist is read-locked by
 200  * fsh_exec_{mount,free}(). This means that fsh_callback_{install,remove}
 201  * must not be called inside a callback, because it will cause a deadlock.
 202  *
 203  * Solution to concurrency issues involving vfs_fshrecord are explained
 204  * both in chapter 5th "Internals" and before fsh_prepare_fsrec() function.
 205  *
 206  * Concurrency issues between fsh_hook_remove() and fsh_fsrec_destroy() are
 207  * solved by fsh_remove_lock. For more info see: fsh_remove_lock,
 208  * fsh_hook_remove(), fsh_fsrec_destroy().
 209  */
 210 
 211 /* Internals */
 212 /* Used for mapping an fsh_handle_t to fsh_int_t. */
 213 typedef struct fsh_mapping {
 214         fsh_handle_t    fshm_handle;
 215         fsh_int_t       *fshm_fshi;
 216         vfs_t           *fshm_vfsp;
 217         list_node_t     fshm_next;
 218 } fsh_mapping_t;
 219 
 220 struct fsh_int {
 221         fsh_handle_t    fshi_handle;
 222         fsh_t           fshi_hooks;
 223         fsh_mapping_t   *fshi_mapping;
 224         list_node_t     fshi_next;
 225 };
 226 
 227 typedef struct fsh_callback_int {
 228         fsh_callback_t  fshci_cb;
 229         fsh_callback_handle_t fshci_handle;
 230         list_node_t     fshci_next;
 231 } fsh_callback_int_t;
 232 
 233 
 234 /*
 235  * fsh_fsrecord_t is the main internal structure. It's content is protected
 236  * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
 237  * the vfs_t that contains the fsh_fsrecord_t.
 238  *
 239  * It is guaranteed by the fsh_prepare_fsrec() that outside the fsh,
 240  * a pointer to fsh_fsrecord inside a vfs_t is never equal to fsh_res_ptr.
 241  */
 242 struct fsh_fsrecord {
 243         krwlock_t       fshfsr_lock;
 244         int             fshfsr_enabled;
 245         list_t          fshfsr_list;    /* list of fsh_int_t */
 246 };
 247 
 248 /*
 249  * It's a list of fsh_mapping_t's used to map fsh_handle_t's to
 250  * fsh_int_t's. This is needed because of the fact that we'd like an opaque
 251  * handle returned to the fsh API client after a hook is successfully
 252  * installed. We'd like to make the handle the only thing that is needed
 253  * after the hooks are installed, for futher actions on them. This means,
 254  * that there is no easy way to search for the hooks matching a handle,
 255  * without having the vfs_t on which they are installed.
 256  * The same problem doesn't apply to callbacks, that's why fsh_map handles
 257  * only fsh_handle_t to fsh_int_t translation.
 258  */
 259 static kmutex_t fsh_map_lock;
 260 static list_t fsh_map;
 261 
 262 /*
 263  * The lock is used when there is a need to lock both the fsh_fsrecord_t and
 264  * fsh_map.
 265  * fsh_hook_remove() and fsh_fsrec_destroy() use this lock to protect both the
 266  * fsh_map and the fsh_fsrecord_t associated with a vfs_t on which the removing
 267  * is performed. Because fsh_hook_remove() starts with handle, through fsh_map
 268  * to vfs_fshrecord and fsh_fsrec_destroy() from vfs_fshrecord to fsh_map, such
 269  * lock is necessary.
 270  * For more info see: fsh_fsrec_destroy() and fsh_hook_remove()
 271  */
 272 static kmutex_t fsh_remove_lock;
 273 
 274 /*
 275  * It's a list of fsh_callback_int_t's. Unlike hooks, there is no need to
 276  * keep a separate list for translating handles to fsh_callback_int_t's,
 277  * because a callback list is global for all the vfs_t's.
 278  */
 279 static krwlock_t fsh_cblist_lock;
 280 static list_t fsh_cblist;
 281 
 282 /*
 283  * A reserved pointer for fsh purposes. It is used because of the method
 284  * chosen for solving concurrency issues with vfs_fshrecord. The full
 285  * explanation is in the big theory statement at the beginning of this
 286  * file. It is initialised in fsh_init().
 287  */
 288 static void *fsh_res_ptr;
 289 
 290 static fsh_fsrecord_t *fsh_fsrec_create();
 291 
 292 int fsh_limit = INT_MAX;
 293 static id_space_t *fsh_idspace;
 294 
 295 /*
 296  * Important note:
 297  * Before using this function, fsh_init() MUST be called. We do that in
 298  * vfsinit()@vfs.c.
 299  *
 300  * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
 301  * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
 302  * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
 303  * filesystems could do the same thing. That's why this solution is
 304  * introduced. It should be called before any code that needs access to
 305  * vfs_fshrecord.
 306  *
 307  * Locking:
 308  * There are no locks here, because there's no good place to initialise
 309  * the lock. Concurrency issues are solved by using atomic instructions
 310  * and a spinlock, which is spinning only once for a given vfs_t. Because
 311  * of that, the usage of the spinlock isn't bad at all.
 312  *
 313  * How it works:
 314  * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
 315  *      fsh_res_ptr. That's a signal for other threads, that the structure
 316  *      is being initialised.
 317  * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
 318  *      because vfs_fshrecord is being initialised by another call.
 319  * c) other cases:
 320  *      vfs_fshrecord is already initialised, so we can use it. It won't change
 321  *      until vfs_free() is called. It can't happen when someone is holding
 322  *      the vfs_t, which is expected from the caller of fsh API.
 323  */
 324 static void
 325 fsh_prepare_fsrec(vfs_t *vfsp)
 326 {
 327         fsh_fsrecord_t *fsrec;
 328 
 329         while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
 330             fsh_res_ptr)) == fsh_res_ptr)
 331                 ;
 332 
 333         if (fsrec == NULL)
 334                 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
 335 }
 336 
 337 /*
 338  * API for enabling/disabling fsh per vfs_t.
 339  *
 340  * A newly created vfs_t has fsh enabled by default. If one would want to change
 341  * this behaviour, mount callbacks could be used.
 342  *
 343  * The caller is expected to hold the vfs_t.
 344  *
 345  * These functions must NOT be called in a hook.
 346  */
 347 void
 348 fsh_fs_enable(vfs_t *vfsp)
 349 {
 350         fsh_prepare_fsrec(vfsp);
 351 
 352         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 353         vfsp->vfs_fshrecord->fshfsr_enabled = 1;
 354         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 355 }
 356 
 357 void
 358 fsh_fs_disable(vfs_t *vfsp)
 359 {
 360         fsh_prepare_fsrec(vfsp);
 361 
 362         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 363         vfsp->vfs_fshrecord->fshfsr_enabled = 0;
 364         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 365 }
 366 
 367 /*
 368  * API used for installing hooks. fsh_handle_t is returned for further
 369  * actions (currently just removing) on this set of hooks.
 370  *
 371  * fsh_t fields:
 372  * - arg - argument passed to every hook
 373  * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
 374  *      if there is no hook desired for an operation, it should be set to
 375  *      NULL
 376  *
 377  * It's important that the hooks are executed in LIFO installation order (they
 378  * are added to the head of the hook list).
 379  *
 380  * The caller is expected to hold the vfs_t.
 381  *
 382  * This function must NOT be called in a hook.
 383  *
 384  * Returns (-1) if hook/callback limit exceeded, handle otherwise.
 385  */
 386 fsh_handle_t
 387 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 388 {
 389         fsh_handle_t    handle;
 390         fsh_int_t       *fshi;
 391         fsh_mapping_t   *mapping;
 392 
 393         fsh_prepare_fsrec(vfsp);
 394 
 395         if ((handle = id_alloc(fsh_idspace)) == -1)
 396                 return (-1);
 397 
 398         fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
 399         (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
 400         fshi->fshi_handle = handle;
 401 
 402         /*
 403          * Although we lock vfs_fshrecord and fsh_map in this function,
 404          * there is no need to use fsh_remove_lock. Since it is expected that a
 405          * vfs_t is held across this call, fsh_fsrec_destroy() cannot be
 406          * executing concurrently. fsh_hook_remove() cannot be called for this
 407          * hook set, because a handle passed to that function doesn't yet exist.
 408          */
 409 
 410         /* If it is called inside of a hook, causes deadlock. */
 411         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 412         list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
 413         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 414 
 415         mapping = kmem_alloc(sizeof (*mapping), KM_SLEEP);
 416         mapping->fshm_handle = handle;
 417         mapping->fshm_vfsp = vfsp;
 418         mapping->fshm_fshi = fshi;
 419         fshi->fshi_mapping = mapping;
 420 
 421         mutex_enter(&fsh_map_lock);
 422         list_insert_head(&fsh_map, mapping);
 423         mutex_exit(&fsh_map_lock);
 424 
 425         return (handle);
 426 }
 427 
 428 /*
 429  * Used for removing a hook set.
 430  *
 431  * This function must NOT be called in a hook.
 432  *
 433  * Returns (-1) if hook wasn't found, 0 otherwise.
 434  */
 435 int
 436 fsh_hook_remove(fsh_handle_t handle)
 437 {
 438         fsh_fsrecord_t  *fsrecp;
 439         fsh_mapping_t   *mapping;
 440 
 441         /* For more info about the fsh_remove_lock, see fsh_fsrec_destroy() */
 442         mutex_enter(&fsh_remove_lock);
 443         mutex_enter(&fsh_map_lock);
 444         for (mapping = list_head(&fsh_map); mapping != NULL;
 445             mapping = list_next(&fsh_map, mapping)) {
 446                 if (mapping->fshm_handle == handle) {
 447                         list_remove(&fsh_map, mapping);
 448                         break;
 449                 }
 450         }
 451         mutex_exit(&fsh_map_lock);
 452         if (mapping == NULL) {
 453                 mutex_exit(&fsh_remove_lock);
 454                 return (-1);
 455         }
 456 
 457         ASSERT(mapping->fshm_fshi->fshi_handle == handle);
 458 
 459         /*
 460          * We don't have to call fsh_prepare_fsrec() here. fsh_fsrecord_t
 461          * is already initialised, because we've found a mapping for the given
 462          * handle. We instead make two ASSERTs.
 463          */
 464         fsrecp = mapping->fshm_vfsp->vfs_fshrecord;
 465         ASSERT(fsrecp != NULL);
 466         ASSERT(fsrecp != fsh_res_ptr);
 467 
 468         /* If it is called inside of a hook, causes deadlock. */
 469         rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
 470         list_remove(&fsrecp->fshfsr_list, mapping->fshm_fshi);
 471         rw_exit(&fsrecp->fshfsr_lock);
 472 
 473         mutex_exit(&fsh_remove_lock);
 474 
 475         id_free(fsh_idspace, handle);
 476 
 477         kmem_free(mapping->fshm_fshi, sizeof (*mapping->fshm_fshi));
 478         kmem_free(mapping, sizeof (*mapping));
 479 
 480         return (0);
 481 }
 482 
 483 /*
 484  * API for installing global mount/free callbacks.
 485  *
 486  * fsh_callback_t fields:
 487  * fshc_arg - argument passed to the callbacks
 488  * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
 489  *      drops to 0
 490  * fshc_mount - callback fired right before returning from domount()
 491  * The first argument of these callbacks is the vfs_t that is mounted/freed.
 492  * The second one is the fshc_arg.
 493  *
 494  * fsh_callback_handle_t is filled out by this function.
 495  *
 496  * This function must NOT be called in a callback, because it will cause
 497  * a deadlock.
 498  *
 499  * Returns (-1) if hook/callback limit exceeded.
 500  */
 501 fsh_callback_handle_t
 502 fsh_callback_install(fsh_callback_t *callback)
 503 {
 504         fsh_callback_int_t *fshci;
 505         fsh_callback_handle_t handle;
 506 
 507         if ((handle = id_alloc(fsh_idspace)) == -1)
 508                 return (-1);
 509 
 510         fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
 511         (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
 512         fshci->fshci_handle = handle;
 513 
 514         /* If it is called in a {mount,free} callback, causes deadlock. */
 515         rw_enter(&fsh_cblist_lock, RW_WRITER);
 516         list_insert_head(&fsh_cblist, fshci);
 517         rw_exit(&fsh_cblist_lock);
 518 
 519         return (handle);
 520 }
 521 
 522 /*
 523  * API for removing global mount/free callbacks.
 524  *
 525  * This function must NOT be called in a callback, because it will cause
 526  * a deadlock.
 527  *
 528  * Returns (-1) if callback wasn't found, 0 otherwise.
 529  */
 530 int
 531 fsh_callback_remove(fsh_callback_handle_t handle)
 532 {
 533         fsh_callback_int_t *fshci;
 534 
 535         /* If it is called in a {mount,free} callback, causes deadlock. */
 536         rw_enter(&fsh_cblist_lock, RW_WRITER);
 537         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 538             fshci = list_next(&fsh_cblist, fshci)) {
 539                 if (fshci->fshci_handle == handle) {
 540                         list_remove(&fsh_cblist, fshci);
 541                         break;
 542                 }
 543         }
 544         rw_exit(&fsh_cblist_lock);
 545 
 546         if (fshci == NULL)
 547                 return (-1);
 548 
 549         kmem_free(fshci, sizeof (*fshci));
 550         id_free(fsh_idspace, handle);
 551 
 552         return (0);
 553 }
 554 
 555 /*
 556  * This function is executed right before returning from domount()@vfs.c.
 557  * We are sure that it's called only after fsh_init().
 558  * It executes all the mount callbacks installed in the fsh.
 559  *
 560  * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
 561  * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
 562  * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
 563  * section in the big theory statement at the top of this file.
 564  */
 565 void
 566 fsh_exec_mount_callbacks(vfs_t *vfsp)
 567 {
 568         fsh_callback_int_t *fshci;
 569         fsh_callback_t *cb;
 570 
 571         rw_enter(&fsh_cblist_lock, RW_READER);
 572         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 573             fshci = list_next(&fsh_cblist, fshci)) {
 574                 cb = &fshci->fshci_cb;
 575                 if (cb->fshc_mount != NULL)
 576                         (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
 577         }
 578         rw_exit(&fsh_cblist_lock);
 579 }
 580 
 581 /*
 582  * This function is executed right before VFS_FREEVFS() is called in
 583  * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
 584  * It executes all the free callbacks installed in the fsh.
 585  *
 586  * free() callback is the point where the handles associated with the hooks
 587  * installed on this vfs_t become invalid
 588  */
 589 void
 590 fsh_exec_free_callbacks(vfs_t *vfsp)
 591 {
 592         fsh_callback_int_t *fshci;
 593         fsh_callback_t *cb;
 594 
 595         rw_enter(&fsh_cblist_lock, RW_READER);
 596         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 597             fshci = list_next(&fsh_cblist, fshci)) {
 598                 cb = &fshci->fshci_cb;
 599                 if (cb->fshc_free != NULL)
 600                         (*(cb->fshc_free))(vfsp, cb->fshc_arg);
 601         }
 602         rw_exit(&fsh_cblist_lock);
 603 }
 604 
 605 /*
 606  * API for vnode.c/vfs.c to start executing the fsh for a given operation.
 607  *
 608  * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
 609  * absolutely necessary to call fsh_init() before using them. That's done in
 610  * vfsinit().
 611  *
 612  * While these functions are executing, it's expected that necessary vfs_t's
 613  * are held so that vfs_free() isn't called. vfs_free() expects that noone
 614  * else accesses vfs_fshrecord of a given vfs_t.
 615  * It's also the caller responsibility to keep vnode_t passed to fsh_foo()
 616  * alive and valid.
 617  * All these expectations are met because these functions are used only in
 618  * correspondng {fop,fsop}_foo() functions.
 619  */
 620 int
 621 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 622         caller_context_t *ct)
 623 {
 624         int ret;
 625         fsh_fsrecord_t *fsrecp;
 626 
 627         fsh_prepare_fsrec(vp->v_vfsp);
 628         fsrecp = vp->v_vfsp->vfs_fshrecord;
 629 
 630         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 631         if (!(fsrecp->fshfsr_enabled)) {
 632                 rw_exit(&fsrecp->fshfsr_lock);
 633                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 634         }
 635 
 636         ret = fsh_next_read(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
 637             cr, ct);
 638         rw_exit(&fsrecp->fshfsr_lock);
 639 
 640         return (ret);
 641 }
 642 
 643 int
 644 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 645         caller_context_t *ct)
 646 {
 647         int ret;
 648         fsh_fsrecord_t *fsrecp;
 649 
 650         fsh_prepare_fsrec(vp->v_vfsp);
 651         fsrecp = vp->v_vfsp->vfs_fshrecord;
 652 
 653         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 654         if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
 655                 rw_exit(&fsrecp->fshfsr_lock);
 656                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 657         }
 658 
 659         ret = fsh_next_write(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
 660             cr, ct);
 661         rw_exit(&fsrecp->fshfsr_lock);
 662 
 663         return (ret);
 664 }
 665 
 666 int
 667 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 668 {
 669         fsh_fsrecord_t *fsrecp;
 670         int ret;
 671 
 672         fsh_prepare_fsrec(vfsp);
 673         fsrecp = vfsp->vfs_fshrecord;
 674 
 675         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 676         if (!(fsrecp->fshfsr_enabled)) {
 677                 rw_exit(&fsrecp->fshfsr_lock);
 678                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 679         }
 680 
 681         ret = fsh_next_mount(list_head(&fsrecp->fshfsr_list), vfsp, mvp, uap,
 682             cr);
 683         rw_exit(&fsrecp->fshfsr_lock);
 684 
 685         return (ret);
 686 }
 687 
 688 int
 689 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 690 {
 691         fsh_fsrecord_t *fsrecp;
 692         int ret;
 693 
 694         fsh_prepare_fsrec(vfsp);
 695         fsrecp = vfsp->vfs_fshrecord;
 696 
 697         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 698         if (!(fsrecp->fshfsr_enabled)) {
 699                 rw_exit(&fsrecp->fshfsr_lock);
 700                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 701         }
 702 
 703         ret = fsh_next_unmount(list_head(&fsrecp->fshfsr_list), vfsp, flag, cr);
 704         rw_exit(&fsrecp->fshfsr_lock);
 705 
 706         return (ret);
 707 }
 708 
 709 /*
 710  * This is the funtion used by fsh_prepare_fsrec() to allocate a new
 711  * fsh_fsrecord. This function is called by the first function which
 712  * access the vfs_fshrecord and finds out it's NULL.
 713  */
 714 static fsh_fsrecord_t *
 715 fsh_fsrec_create()
 716 {
 717         fsh_fsrecord_t *fsrecp;
 718 
 719         fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
 720         list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
 721             offsetof(fsh_int_t, fshi_next));
 722         rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
 723         fsrecp->fshfsr_enabled = 1;
 724         return (fsrecp);
 725 }
 726 
 727 
 728 /*
 729  * This call can be used ONLY in vfs_free(). It's assumed that no other
 730  * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
 731  * are executing while a call to fsh_fsrec_destroy() is made. With this
 732  * assumptions, no concurrency issues occur.
 733  *
 734  * Before calling this function outside the fsh, it's sufficient and
 735  * required to check if the passed fsh_fsrecord * is not NULL. We don't
 736  * have to check if it is not equal to fsh_res_ptr, because all the fsh API
 737  * calls involving this vfs_t should end before vfs_free() is called
 738  * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
 739  * guaranteed by the explicit requirement that the caller of fsh API holds
 740  * the vfs_t when needed.
 741  *
 742  * All the remaining hooks are being removed.
 743  */
 744 void
 745 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
 746 {
 747         fsh_int_t *fshi;
 748 
 749         VERIFY(fsrecp != NULL);
 750 
 751         /*
 752          * Although it is expected that no fsh calls using this vfs_t
 753          * would be executing, we would not like to panic if that happens.
 754          * That's a client's bug obviously, but we'd like to minimise the
 755          * possibility of a system crash. That's why fsh_remove_lock is used
 756          * here. Even if fsh_hook_remove() would be called with a handle that is
 757          * invalid, it will just return (-1), because it won't find the handle
 758          * in fsh_map.  here is of course a possibility, that an invalid handle
 759          * (id_t internally) would become valid (it would be assigned again),
 760          * but this is very rare, because of the fact that id_alloc() uses next
 761          * fit strategy to alloc the id_t's.
 762          * For more info see fsh_hook_remove()
 763          */
 764         mutex_enter(&fsh_remove_lock);
 765         while ((fshi = list_remove_head(&fsrecp->fshfsr_list)) != NULL) {
 766                 fsh_mapping_t *mapping = fshi->fshi_mapping;
 767 
 768                 ASSERT(mapping->fshm_handle == fshi->fshi_handle);
 769                 ASSERT(mapping->fshm_fshi == fshi);
 770 
 771                 mutex_enter(&fsh_map_lock);
 772                 list_remove(&fsh_map, mapping);
 773                 mutex_exit(&fsh_map_lock);
 774 
 775                 id_free(fsh_idspace, fshi->fshi_handle);
 776 
 777                 kmem_free(fshi, sizeof (*fshi));
 778                 kmem_free(mapping, sizeof (*mapping));
 779         }
 780         mutex_exit(&fsh_remove_lock);
 781 
 782         list_destroy(&fsrecp->fshfsr_list);
 783         rw_destroy(&fsrecp->fshfsr_lock);
 784         kmem_free(fsrecp, sizeof (*fsrecp));
 785 }
 786 
 787 /*
 788  * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
 789  * before every other fsh call.
 790  */
 791 void
 792 fsh_init(void)
 793 {
 794         rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
 795         list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
 796             offsetof(fsh_callback_int_t, fshci_next));
 797 
 798         mutex_init(&fsh_map_lock, NULL, MUTEX_DRIVER, NULL);
 799         mutex_init(&fsh_remove_lock, NULL, MUTEX_DRIVER, NULL);
 800 
 801         list_create(&fsh_map, sizeof (fsh_mapping_t),
 802             offsetof(fsh_mapping_t, fshm_next));
 803 
 804         /* See comment above fsh_prepare_fsrec() */
 805         fsh_res_ptr = (void *)-1;
 806 
 807         fsh_idspace = id_space_create("fsh", 0, fsh_limit);
 808 }
 809 
 810 /*
 811  * These functions are used to pass control to the next hook or underlying
 812  * vop or vfsop. It's client doesn't have to worry about any locking, because
 813  * all the necessities are guaranteed by the fsh_foo().
 814  *
 815  * In fsh_next_foo() we execute the hook passed in the first argument and
 816  * try to find the next one. It is guaranteed that the passed hook is still
 817  * valid, because of fshfsr_lock held by fsh_foo().
 818  */
 819 int
 820 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 821         cred_t *cr, caller_context_t *ct)
 822 {
 823         while (fshi != NULL && fshi->fshi_hooks.read == NULL)
 824                 fshi = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list,
 825                     fshi);
 826 
 827         if (fshi != NULL)
 828                 return ((*(fshi->fshi_hooks.read))(
 829                     list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list, fshi),
 830                     fshi->fshi_hooks.arg, vp, uiop, ioflag, cr, ct));
 831         else
 832                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 833 }
 834 
 835 int
 836 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 837         cred_t *cr, caller_context_t *ct)
 838 {
 839         while (fshi != NULL && fshi->fshi_hooks.write == NULL)
 840                 fshi = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list,
 841                     fshi);
 842 
 843         if (fshi != NULL)
 844                 return ((*(fshi->fshi_hooks.write))(
 845                     list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list, fshi),
 846                     fshi->fshi_hooks.arg, vp, uiop, ioflag, cr, ct));
 847         else
 848                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 849 }
 850 
 851 int
 852 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
 853         cred_t *cr)
 854 {
 855         while (fshi != NULL && fshi->fshi_hooks.mount == NULL)
 856                 fshi = list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
 857 
 858         if (fshi != NULL)
 859                 return ((*(fshi->fshi_hooks.mount))(
 860                     list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi),
 861                     fshi->fshi_hooks.arg, vfsp, mvp, uap, cr));
 862         else
 863                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 864 }
 865 
 866 int
 867 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
 868 {
 869 
 870         while (fshi != NULL && fshi->fshi_hooks.unmount == NULL)
 871                 fshi = list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
 872 
 873         if (fshi != NULL)
 874                 return ((*(fshi->fshi_hooks.unmount))(
 875                     list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi),
 876                     fshi->fshi_hooks.arg, vfsp, flag, cr));
 877         else
 878                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 879 }