1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 Damian Bogel.  All rights reserved.
  14  */
  15 
  16 #include <sys/debug.h>
  17 #include <sys/errno.h>
  18 #include <sys/fsh.h>
  19 #include <sys/fsh_impl.h>
  20 #include <sys/id_space.h>
  21 #include <sys/kmem.h>
  22 #include <sys/ksynch.h>
  23 #include <sys/list.h>
  24 #include <sys/sunddi.h>
  25 #include <sys/sysmacros.h>
  26 #include <sys/types.h>
  27 #include <sys/vfs.h>
  28 #include <sys/vnode.h>
  29 
  30 /*
  31  * TODO:
  32  * - solve the fsh_cblist_lock deadlock potential problem (fsh_callback_install
  33  * inside a callback) (that is also a problem for hooks)
  34  */
  35 
  36 /*
  37  * Filesystem hook framework (fsh)
  38  *
  39  * 1. Abstract.
  40  * The main goal of the filesystem hook framework is to provide an easy way to
  41  * inject consumer-defined behaviour into vfs/vnode calls. fsh works on
  42  * vfs_t granularity.
  43  *
  44  *
  45  * 2. Overview.
  46  * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
  47  *      - pointers to hooking functions (named after corresponding
  48  *      vnodeops/vfsops)
  49  *      - a pointer to an argument to pass (this is shared for all the
  50  *      hooks in a given fsh_t)
  51  *      - pointer to a callback that should be executed after a hook is removed
  52  *
  53  * The information from fsh_t is copied by the fsh and an fsh_handle_t
  54  * is returned. It should be used for further removing.
  55  *
  56  *
  57  * 3. Usage.
  58  * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
  59  * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
  60  *
  61  * fsh_t is a structure filled out by the consumer. If a consumer does not want
  62  * to add/remove a hook for function foo(), he should fill the foo field of
  63  * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
  64  * two additional arguments:
  65  *      - fsh_int_t *fsh_int - this argument MUST be passed to
  66  *      hook_next_foo(). fsh wouldn't know which hook to execute next
  67  *      without it
  68  *      - void *arg - this is the argument passed with fsh_t during
  69  *      installation
  70  * fsh_t containts also two other fields:
  71  *      - void *arg - mentioned above, which is the argument passed to the hooks
  72  *      - remove_callback - it's a pointer to a callback that should execute
  73  *      right after the hook has been removed. It takes the arg and the handle
  74  *      as parameters.
  75  * After installation, an fsh_handle_t is returned to the caller.
  76  *
  77  * When a vfs_t is freed, all remaining hooks are being removed and the
  78  * remove_callbacks are being executed.
  79  *
  80  * Every hook function is responsible for passing the control to the next
  81  * hook associated with a particular call. In order to provide an easy way to
  82  * modify the behaviour of a function call both before and after the
  83  * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
  84  * fsh_next_foo() at some point. This function does necessary internal
  85  * operations and calls the next hook, until there's no hook left, then it
  86  * calls the underlying vfsop/vnodeop.
  87  * Example:
  88  * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
  89  *      cmn_err(CE_NOTE, "freefs called!\n");
  90  *      return (fsh_next_freefs(fsh_int, vfsp));
  91  * }
  92  *
  93  * A consumer might want to fire callbacks when vfs_t's are being mounted
  94  * or freed. There's an fsh_callback_t structure provided to install such
  95  * callbacks along with the API.
  96  *
  97  *
  98  * 4. API
  99  * None of the APIs should be called during interrupt context above lock
 100  * level. The only exceptions are fsh_next_foo() functions, which do not use
 101  * locks.
 102  *
 103  * a) fsh.h
 104  * None of the functions listed below should be called inside of a hook
 105  * Doing so will cause a deadlock. The only exceptions are fsh_next_foo() and
 106  * fsh_callback_{install,remove}().
 107  *
 108  * fsh_callback_{install,remove}() should not be called inside of a {mount,free}
 109  * callback. Doing so will cause a deadlock.
 110  *
 111  * fsh_fs_enable(vfs_t *vfsp)
 112  * fsh_fs_disable(vfs_t *vfsp)
 113  *      Enables/disables fsh for a given vfs_t.
 114  *
 115  * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 116  *      Installs hooks on vfsp filesystem. It's important that hooks are
 117  *      executed in LIFO installation order, which means that if there are
 118  *      hooks A and B installed in this order, B is going to be executed
 119  *      before A. It returns a correct handle, or (-1) if hook/callback
 120  *      limit exceeded.
 121  *
 122  * fsh_hook_remove(fsh_handle_t handle)
 123  *      Removes hooks.
 124  *
 125  * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
 126  *      This is the function which should be called once in every hook. It
 127  *      does the necessary internal operations and passes control to the
 128  *      next hook or, if there's no hook left, to the underlying
 129  *      vfsop/vnodeop.
 130  *
 131  * fsh_callback_install(fsh_callback_t *callback)
 132  * fsh_callback_remove(fsh_callback_handle_t handle)
 133  *      Installs/removes callbacks for vfs_t mount/free. The mount callback
 134  *      is executed right before domount() returns. The free callback is
 135  *      called right before VFS_FREEVFS() is called. The
 136  *      fsh_callback_install() returns a correct handle, or (-1) if
 137  *      hook/callback limit exceeded.
 138  *
 139  * b) fsh_impl.h (for vfs.c and vnode.c only)
 140  * fsh_init()
 141  *      This call has to be done in vfsinit(). It initialises the fsh. It
 142  *      is absolutely necessary that this call is made before any other fsh
 143  *      operation.
 144  *
 145  * fsh_exec_mount_callbacks(vfs_t *vfsp)
 146  * fsh_exec_free_callbacks(vfs_t *vfsp)
 147  *      Used to execute all fsh callbacks for {mount,free} of a vfs_t.
 148  *
 149  * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
 150  *      Destroys an fsh_fsrecord structure.
 151  *
 152  * fsh_foo(ARGUMENTS)
 153  *      Function used to start executing the hook chain for a given call.
 154  *
 155  * ---
 156  *
 157  * Because of the fact that unremoved hooks are being removed when a vfs_t
 158  * is being freed, the client should not expect that remove_callback()
 159  * would be called only when the vfs_t is still alive.
 160  *
 161  * 5. Internals.
 162  * fsh_fsrecord_t is a structure which lives inside a vfs_t.
 163  * fsh_fsrecord_t contains:
 164  *      - an rw-lock that protects the structure
 165  *      - a list of hooks installed on this vfs_t
 166  *      - a flag which tells whether fsh is enabled on this vfs_t
 167  *
 168  * Unfortunately, because of unexpected behaviour of some filesystems (no
 169  * use of vfs_alloc()/vfs_init()) there's no good place to initialise the
 170  * fsh_fshrecord_t structure. The approach being used here is to check if
 171  * it's initialised in every call. Because of the fact that no lock could
 172  * be used here (the same problem with initialisation), a spinlock is used.
 173  * This is explained in more detail in a comment before
 174  * fsh_prepare_fsrec(), a function that should be used whenever a
 175  * vfsp->vfs_fshrecord needs to be accessed.  After doing that, it's
 176  * completely safe to keep this pointer locally, because it won't be
 177  * changed until vfs_free() is called.
 178  *
 179  * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
 180  * where there is expected that no other fsh calls would be made for the
 181  * vfs_t that's being freed.
 182  *
 183  * When there are no fsh functions that use a particular fsh_fsrecord_t
 184  * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
 185  * would be NULL or a pointer to an initialised fsh_fsrecord_t.
 186  *
 187  *
 188  * fsh_next_foo()
 189  * This function is quite simple. It takes the fsh_int_t and passes control to
 190  * the next hook or to the underlying vnodeop/vfsop.
 191  *
 192  *
 193  * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
 194  * before returning from domount()@vfs.c.
 195  *
 196  * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
 197  * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
 198  *
 199  *
 200  * 6. Concurrency
 201  * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
 202  * needed, the consumer does that.
 203  *
 204  * An fsh_fsrecord_t of a vfs_t is read-locked (fshfsr_lock) by every
 205  * fsh_foo() function (with the mentioned vfs_t as a parameter, of course).
 206  * This means that fsh_hook_{install,remove}() must NOT be called inside of
 207  * a hook, because it will cause a deadlock.
 208  *
 209  * The same thing applies to callbacks. fsh_cblist is read-locked by
 210  * fsh_exec_{mount,free}(). This means that fsh_callback_{install,remove}
 211  * must not be called inside a callback, because it will cause a deadlock.
 212  *
 213  * Solution to concurrency issues involving vfs_fshrecord are explained
 214  * both in chapter 5th "Internals" and before fsh_prepare_fsrec() function.
 215  */
 216 
 217 /* Internals */
 218 struct fsh_int {
 219         fsh_handle_t    fshi_handle;
 220         fsh_t           fshi_hooks;
 221         list_node_t     fshi_next;
 222 };
 223 
 224 typedef struct fsh_callback_int {
 225         fsh_callback_t  fshci_cb;
 226         fsh_callback_handle_t fshci_handle;
 227         list_node_t     fshci_next;
 228 } fsh_callback_int_t;
 229 
 230 /* Used for mapping an fsh_handle_t to fsh_int_t. */
 231 typedef struct fsh_mapping {
 232         fsh_handle_t    fshm_handle;
 233         fsh_int_t       *fshm_fshi;
 234         vfs_t           *fshm_vfsp;
 235         list_node_t     fshm_next;
 236 } fsh_mapping_t;
 237 
 238 /*
 239  * fsh_fsrecord_t is the main internal structure. It's content is protected
 240  * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
 241  * the vfs_t that contains the fsh_fsrecord_t.
 242  *
 243  * It is guaranteed by the fsh_prepare_fsrec() that outside the fsh,
 244  * a pointer to fsh_fsrecord inside a vfs_t is never equal to fsh_res_ptr.
 245  */
 246 struct fsh_fsrecord {
 247         krwlock_t       fshfsr_lock;
 248         int             fshfsr_enabled;
 249         list_t          fshfsr_list;    /* list of fsh_int_t */
 250 };
 251 
 252 /*
 253  * It's a list of fsh_mapping_t's used to map fsh_handle_t's to
 254  * fsh_int_t's. This is needed because of the fact that we'd like an opaque
 255  * handle returned to the fsh API client after a hook is successfully
 256  * installed. We'd like to make the handle the only thing that is needed
 257  * after the hooks are installed, for futher actions on them. This means,
 258  * that there is no easy way to search for the hooks matching a handle,
 259  * without having the vfs_t on which they are installed.
 260  * The same problem doesn't apply to callbacks, that's why fsh_map handles
 261  * only fsh_handle_t to fsh_int_t translation.
 262  */
 263 static kmutex_t fsh_map_lock;
 264 static list_t fsh_map;
 265 
 266 /*
 267  * It's a list of fsh_callback_int_t's. Unlike hooks, there is no need to
 268  * keep a separate list for translating handles to fsh_callback_int_t's,
 269  * because a callback list is global for all the vfs_t's.
 270  */
 271 static krwlock_t fsh_cblist_lock;
 272 static list_t fsh_cblist;
 273 
 274 /*
 275  * A reserved pointer for fsh purposes. It is used because of the method
 276  * chosen for solving concurrency issues with vfs_fshrecord. The full
 277  * explanation is in the big theory statement at the beginning of this
 278  * file. It is initialised in fsh_init().
 279  */
 280 static void *fsh_res_ptr;
 281 
 282 static fsh_fsrecord_t *fsh_fsrec_create();
 283 
 284 int fsh_limit = INT_MAX;
 285 static id_space_t *fsh_idspace;
 286 
 287 /*
 288  * Important note:
 289  * Before using this function, fsh_init() MUST be called. We do that in
 290  * vfsinit()@vfs.c.
 291  *
 292  * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
 293  * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
 294  * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
 295  * filesystems could do the same thing. That's why this solution is
 296  * introduced. It should be called before any code that needs access to
 297  * vfs_fshrecord.
 298  *
 299  * Locking:
 300  * There are no locks here, because there's no good place to initialise
 301  * the lock. Concurrency issues are solved by using atomic instructions
 302  * and a spinlock, which is spinning only once for a given vfs_t. Because
 303  * of that, the usage of the spinlock isn't bad at all.
 304  *
 305  * How it works:
 306  * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
 307  *      fsh_res_ptr. That's a signal for other threads, that the structure
 308  *      is being initialised.
 309  * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
 310  *      because vfs_fshrecord is being initialised by another call.
 311  * c) other cases:
 312  *      vfs_fshrecord is already initialised, so we can use it. It won't change
 313  *      until vfs_free() is called. It can't happen when someone is holding
 314  *      the vfs_t, which is expected from the caller of fsh API.
 315  */
 316 static void
 317 fsh_prepare_fsrec(vfs_t *vfsp)
 318 {
 319         fsh_fsrecord_t *fsrec;
 320 
 321         while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
 322             fsh_res_ptr)) == fsh_res_ptr)
 323                 ;
 324 
 325         if (fsrec == NULL)
 326                 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
 327 }
 328 
 329 /*
 330  * API for enabling/disabling fsh per vfs_t.
 331  *
 332  * These functions must NOT be called in a hook.
 333  */
 334 void
 335 fsh_fs_enable(vfs_t *vfsp)
 336 {
 337         fsh_prepare_fsrec(vfsp);
 338 
 339         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 340         vfsp->vfs_fshrecord->fshfsr_enabled = 1;
 341         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 342 }
 343 
 344 void
 345 fsh_fs_disable(vfs_t *vfsp)
 346 {
 347         fsh_prepare_fsrec(vfsp);
 348 
 349         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 350         vfsp->vfs_fshrecord->fshfsr_enabled = 0;
 351         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 352 }
 353 
 354 /*
 355  * API used for installing hooks. fsh_handle_t is returned for further
 356  * actions (currently just removing) on this set of hooks.
 357  *
 358  * fsh_t fields:
 359  * - arg - argument passed to every hook
 360  * - remove_callback - callback executed right after a hook is removed
 361  * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
 362  *      if there is no hook desired for an operation, it should be set to
 363  *      NULL
 364  *
 365  * It's important that the hooks are executed in LIFO installation order (they
 366  * are added to the head of the hook list).
 367  *
 368  * This function must NOT be called in a hook.
 369  *
 370  * Returns (-1) if hook/callback limit exceeded, handle otherwise.
 371  */
 372 fsh_handle_t
 373 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 374 {
 375         fsh_handle_t    handle;
 376         fsh_int_t       *fshi;
 377         fsh_mapping_t   *mapping;
 378 
 379         fsh_prepare_fsrec(vfsp);
 380 
 381         if ((handle = id_alloc(fsh_idspace)) == -1)
 382                 return (-1);
 383 
 384         fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
 385         (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
 386         fshi->fshi_handle = handle;
 387 
 388         /* If it is called inside of a hook, causes deadlock. */
 389         rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
 390         list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
 391         rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
 392 
 393 
 394         mapping = kmem_alloc(sizeof (*mapping), KM_SLEEP);
 395         mapping->fshm_handle = handle;
 396         mapping->fshm_vfsp = vfsp;
 397         mapping->fshm_fshi = fshi;
 398 
 399         mutex_enter(&fsh_map_lock);
 400         list_insert_head(&fsh_map, mapping);
 401         mutex_exit(&fsh_map_lock);
 402 
 403         return (handle);
 404 }
 405 
 406 /* Finds a mapping by handle and removes it from from fsh_map. */
 407 static fsh_mapping_t *
 408 fsh_remove_mapping(fsh_handle_t handle)
 409 {
 410         fsh_mapping_t *mapping;
 411 
 412         mutex_enter(&fsh_map_lock);
 413         for (mapping = list_head(&fsh_map); mapping != NULL;
 414             mapping = list_next(&fsh_map, mapping)) {
 415                 if (mapping->fshm_handle == handle) {
 416                         list_remove(&fsh_map, mapping);
 417                         break;
 418                 }
 419         }
 420         mutex_exit(&fsh_map_lock);
 421         return (mapping);
 422 }
 423 
 424 /*
 425  * Used for removing a hook set. remove_callback() is executed right after
 426  * the hook is removed from the vfs_t.
 427  *
 428  * This function must NOT be called in a hook.
 429  *
 430  * Returns (-1) if hook wasn't found, 0 otherwise.
 431  */
 432 int
 433 fsh_hook_remove(fsh_handle_t handle)
 434 {
 435         fsh_fsrecord_t  *fsrecp;
 436         fsh_mapping_t   *mapping;
 437         fsh_t           *hooks;
 438 
 439         mapping = fsh_remove_mapping(handle);
 440         if (mapping == NULL)
 441                 return (-1);
 442 
 443         ASSERT(mapping->fshm_fshi->fshi_handle == handle);
 444 
 445         /*
 446          * We don't have to call fsh_prepare_fsrec() here. fsh_fsrecord_t
 447          * is already initialised, because we've found a mapping for the given
 448          * handle. We instead make two ASSERTs.
 449          */
 450         fsrecp = mapping->fshm_vfsp->vfs_fshrecord;
 451         ASSERT(fsrecp != NULL);
 452         ASSERT(fsrecp != fsh_res_ptr);
 453 
 454         /* If it is called inside of a hook, causes deadlock. */
 455         rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
 456         list_remove(&fsrecp->fshfsr_list, mapping->fshm_fshi);
 457         rw_exit(&fsrecp->fshfsr_lock);
 458 
 459         hooks = &mapping->fshm_fshi->fshi_hooks;
 460         if (hooks->remove_callback != NULL)
 461                 (*(hooks->remove_callback))(hooks->arg, mapping->fshm_handle);
 462 
 463         id_free(fsh_idspace, handle);
 464 
 465         kmem_free(mapping->fshm_fshi, sizeof (*mapping->fshm_fshi));
 466         kmem_free(mapping, sizeof (*mapping));
 467 
 468         return (0);
 469 }
 470 
 471 /*
 472  * API for installing global mount/free callbacks.
 473  *
 474  * fsh_callback_t fields:
 475  * fshc_arg - argument passed to the callbacks
 476  * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
 477  *      drops to 0
 478  * fshc_mount - callback fired right before returning from domount()
 479  * The first argument of these callbacks is the vfs_t that is mounted/freed.
 480  * The second one is the fshc_arg.
 481  *
 482  * fsh_callback_handle_t is filled out by this function.
 483  *
 484  * This function must NOT be called in a callback, because it will cause
 485  * a deadlock.
 486  *
 487  * Returns (-1) if hook/callback limit exceeded.
 488  */
 489 fsh_callback_handle_t
 490 fsh_callback_install(fsh_callback_t *callback)
 491 {
 492         fsh_callback_int_t *fshci;
 493         fsh_callback_handle_t handle;
 494 
 495         if ((handle = id_alloc(fsh_idspace)) == -1)
 496                 return (-1);
 497 
 498         fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
 499         (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
 500         fshci->fshci_handle = handle;
 501 
 502         /* If it is called in a {mount,free} callback, causes deadlock. */
 503         rw_enter(&fsh_cblist_lock, RW_WRITER);
 504         list_insert_head(&fsh_cblist, fshci);
 505         rw_exit(&fsh_cblist_lock);
 506 
 507         return (handle);
 508 }
 509 
 510 /*
 511  * API for removing global mount/free callbacks.
 512  *
 513  * This function must NOT be called in a callback, because it will cause
 514  * a deadlock.
 515  *
 516  * Returns (-1) if callback wasn't found, 0 otherwise.
 517  */
 518 int
 519 fsh_callback_remove(fsh_callback_handle_t handle)
 520 {
 521         fsh_callback_int_t *fshci;
 522 
 523         /* If it is called in a {mount,free} callback, causes deadlock. */
 524         rw_enter(&fsh_cblist_lock, RW_WRITER);
 525         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 526             fshci = list_next(&fsh_cblist, fshci)) {
 527                 if (fshci->fshci_handle == handle) {
 528                         list_remove(&fsh_cblist, fshci);
 529                         break;
 530                 }
 531         }
 532         rw_exit(&fsh_cblist_lock);
 533 
 534         if (fshci == NULL)
 535                 return (-1);
 536 
 537         kmem_free(fshci, sizeof (*fshci));
 538         id_free(fsh_idspace, handle);
 539 
 540         return (0);
 541 }
 542 
 543 /*
 544  * This function is executed right before returning from domount()@vfs.c.
 545  * We are sure that it's called only after fsh_init().
 546  * It executes all the mount callbacks installed in the fsh.
 547  */
 548 void
 549 fsh_exec_mount_callbacks(vfs_t *vfsp)
 550 {
 551         fsh_callback_int_t *fshci;
 552         fsh_callback_t *cb;
 553 
 554         rw_enter(&fsh_cblist_lock, RW_READER);
 555         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 556             fshci = list_next(&fsh_cblist, fshci)) {
 557                 cb = &fshci->fshci_cb;
 558                 if (cb->fshc_mount != NULL)
 559                         (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
 560         }
 561         rw_exit(&fsh_cblist_lock);
 562 }
 563 
 564 /*
 565  * This function is executed right before VFS_FREEVFS() is called in
 566  * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
 567  * It executes all the free callbacks installed in the fsh.
 568  */
 569 void
 570 fsh_exec_free_callbacks(vfs_t *vfsp)
 571 {
 572         fsh_callback_int_t *fshci;
 573         fsh_callback_t *cb;
 574 
 575         rw_enter(&fsh_cblist_lock, RW_READER);
 576         for (fshci = list_head(&fsh_cblist); fshci != NULL;
 577             fshci = list_next(&fsh_cblist, fshci)) {
 578                 cb = &fshci->fshci_cb;
 579                 if (cb->fshc_free != NULL)
 580                         (*(cb->fshc_free))(vfsp, cb->fshc_arg);
 581         }
 582         rw_exit(&fsh_cblist_lock);
 583 }
 584 
 585 /*
 586  * API for vnode.c/vfs.c to start executing the fsh for a given operation.
 587  *
 588  * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
 589  * absolutely necessary to call fsh_init() before using them. That's done in
 590  * vfsinit().
 591  *
 592  * While these functions are executing, it's expected that necessary vfs_t's
 593  * are held so that vfs_free() isn't called. vfs_free() expects that noone
 594  * else accesses vfs_fshrecord of a given vfs_t.
 595  * It's also the caller responsibility to keep vnode_t passed to fsh_foo()
 596  * alive and valid.
 597  * All these expectations are met because these functions are used only in
 598  * correspondng {fop,fsop}_foo() functions.
 599  */
 600 int
 601 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 602         caller_context_t *ct)
 603 {
 604         int ret;
 605         fsh_fsrecord_t *fsrecp;
 606 
 607         fsh_prepare_fsrec(vp->v_vfsp);
 608         fsrecp = vp->v_vfsp->vfs_fshrecord;
 609 
 610         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 611         if (!(fsrecp->fshfsr_enabled)) {
 612                 rw_exit(&fsrecp->fshfsr_lock);
 613                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 614         }
 615 
 616         ret = fsh_next_read(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
 617             cr, ct);
 618         rw_exit(&fsrecp->fshfsr_lock);
 619 
 620         return (ret);
 621 }
 622 
 623 int
 624 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 625         caller_context_t *ct)
 626 {
 627         int ret;
 628         fsh_fsrecord_t *fsrecp;
 629 
 630         fsh_prepare_fsrec(vp->v_vfsp);
 631         fsrecp = vp->v_vfsp->vfs_fshrecord;
 632 
 633         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 634         if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
 635                 rw_exit(&fsrecp->fshfsr_lock);
 636                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 637         }
 638 
 639         ret = fsh_next_write(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
 640             cr, ct);
 641         rw_exit(&fsrecp->fshfsr_lock);
 642 
 643         return (ret);
 644 }
 645 
 646 int
 647 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 648 {
 649         fsh_fsrecord_t *fsrecp;
 650         int ret;
 651 
 652         fsh_prepare_fsrec(vfsp);
 653         fsrecp = vfsp->vfs_fshrecord;
 654 
 655         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 656         if (!(fsrecp->fshfsr_enabled)) {
 657                 rw_exit(&fsrecp->fshfsr_lock);
 658                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 659         }
 660 
 661         ret = fsh_next_mount(list_head(&fsrecp->fshfsr_list), vfsp, mvp, uap,
 662             cr);
 663         rw_exit(&fsrecp->fshfsr_lock);
 664 
 665         return (ret);
 666 }
 667 
 668 int
 669 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 670 {
 671         fsh_fsrecord_t *fsrecp;
 672         int ret;
 673 
 674         fsh_prepare_fsrec(vfsp);
 675         fsrecp = vfsp->vfs_fshrecord;
 676 
 677         rw_enter(&fsrecp->fshfsr_lock, RW_READER);
 678         if (!(fsrecp->fshfsr_enabled)) {
 679                 rw_exit(&fsrecp->fshfsr_lock);
 680                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 681         }
 682 
 683         ret = fsh_next_unmount(list_head(&fsrecp->fshfsr_list), vfsp, flag, cr);
 684         rw_exit(&fsrecp->fshfsr_lock);
 685 
 686         return (ret);
 687 }
 688 
 689 /*
 690  * This is the funtion used by fsh_prepare_fsrec() to allocate a new
 691  * fsh_fsrecord. This function is called by the first function which
 692  * access the vfs_fshrecord and finds out it's NULL.
 693  */
 694 static fsh_fsrecord_t *
 695 fsh_fsrec_create()
 696 {
 697         fsh_fsrecord_t *fsrecp;
 698 
 699         fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
 700         list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
 701             offsetof(fsh_int_t, fshi_next));
 702         rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
 703         fsrecp->fshfsr_enabled = 1;
 704         return (fsrecp);
 705 }
 706 
 707 /*
 708  * This call can be used ONLY in vfs_free(). It's assumed that no other
 709  * fsh_foo() using the vfs_t that owns the fsh_fsrecord to be destroyed
 710  * is executing while call to fsh_fsrec_destroy() is made. With this
 711  * assumptions, no concurrency issues occur.
 712  *
 713  * Before calling this function outside the fsh, it's sufficient and
 714  * requited to check if the passed fsh_fsrecord * is not NULL. We don't
 715  * have to check if it is not equal to fsh_res_ptr, because all the fsh API
 716  * calls involving this vfs_t should end before vfs_free() is called
 717  * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
 718  * guaranteed by the explicit requirement that the caller of fsh API holds
 719  * the vfs_t when needed.
 720  *
 721  * All the remaining hooks are being removed.
 722  */
 723 void
 724 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
 725 {
 726         fsh_int_t *fshi;
 727 
 728         ASSERT(fsrecp != NULL);
 729 
 730         /*
 731          * TODO: We could add fshi_mapping field to fsh_int_t and make the
 732          * process of removing the hooks faster. fsh_remove_mapping() works in
 733          * linear time, because it has to find the element that matches the
 734          * handle. It doesn't have to be this way, because the fsh_int_t
 735          * could have contained the fshi_mapping field pointed to the
 736          * corresponding fsh_mapping_t.
 737          * Since we don't really know if it's going to be a performance
 738          * issue, we leave it like this for the sake of simplicity.
 739          */
 740         while ((fshi = list_remove_head(&fsrecp->fshfsr_list)) != NULL) {
 741                 /*
 742                  * It's a simpler version of the code present in
 743                  * fsh_hook_remove(). There is no fsh_fshrecord locking here,
 744                  * because it is not needed anymore.
 745                  */
 746                 fsh_mapping_t *mapping;
 747                 fsh_t *hooks;
 748 
 749                 mapping = fsh_remove_mapping(fshi->fshi_handle);
 750                 /*
 751                  * mapping == NULL was acceptable in fsh_hook_remove()
 752                  * because an invalid handle could have been passed by the
 753                  * client. This time, we take the handle from an internal
 754                  * structure, so it definitely has to be present in the
 755                  * fsh_map.
 756                  */
 757                 ASSERT(mapping != NULL);
 758                 ASSERT(mapping->fshm_handle == fshi->fshi_handle);
 759                 ASSERT(mapping->fshm_fshi == fshi);
 760 
 761                 hooks = &mapping->fshm_fshi->fshi_hooks;
 762                 if (hooks->remove_callback != NULL)
 763                         (*(hooks->remove_callback))(hooks->arg,
 764                             mapping->fshm_handle);
 765 
 766                 id_free(fsh_idspace, fshi->fshi_handle);
 767 
 768                 kmem_free(fshi, sizeof (*fshi));
 769                 kmem_free(mapping, sizeof (*mapping));
 770         }
 771         list_destroy(&fsrecp->fshfsr_list);
 772         rw_destroy(&fsrecp->fshfsr_lock);
 773         kmem_free(fsrecp, sizeof (*fsrecp));
 774 }
 775 
 776 /*
 777  * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
 778  * before every other fsh call.
 779  */
 780 void
 781 fsh_init(void)
 782 {
 783         rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
 784         list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
 785             offsetof(fsh_callback_int_t, fshci_next));
 786 
 787         mutex_init(&fsh_map_lock, NULL, MUTEX_DRIVER, NULL);
 788         list_create(&fsh_map, sizeof (fsh_mapping_t),
 789             offsetof(fsh_mapping_t, fshm_next));
 790 
 791         /* See comment above fsh_prepare_fsrec() */
 792         fsh_res_ptr = (void *)-1;
 793 
 794         fsh_idspace = id_space_create("fsh", 0, fsh_limit);
 795 }
 796 
 797 /*
 798  * These functions are used to pass control to the next hook or underlying
 799  * vop or vfsop. It's consumer doesn't have to worry about any locking, because
 800  * all the necessities are guaranteed by the fsh_foo().
 801  *
 802  * In fsh_next_foo() we execute the hook passed in the first argument and
 803  * try to find the next one. It is guaranteed that the passed hook is still
 804  * valid, because of fshfsr_lock held by fsh_foo().
 805  */
 806 int
 807 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 808         cred_t *cr, caller_context_t *ct)
 809 {
 810         fsh_int_t       *next;
 811 
 812         if (fshi == NULL)
 813                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 814 
 815         next = fshi;
 816         do {
 817                 next = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list,
 818                     next);
 819         } while (next != NULL && next->fshi_hooks.read == NULL);
 820 
 821         return ((*(fshi->fshi_hooks.read))(next, fshi->fshi_hooks.arg, vp,
 822             uiop, ioflag, cr, ct));
 823 }
 824 
 825 int
 826 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 827         cred_t *cr, caller_context_t *ct)
 828 {
 829         fsh_int_t       *next;
 830 
 831         if (fshi == NULL)
 832                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 833 
 834         next = fshi;
 835         do {
 836                 next = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list, next);
 837         } while (next != NULL && next->fshi_hooks.write == NULL);
 838 
 839         return ((*(fshi->fshi_hooks.write))(next, fshi->fshi_hooks.arg, vp,
 840             uiop, ioflag, cr, ct));
 841 }
 842 
 843 int
 844 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
 845         cred_t *cr)
 846 {
 847         fsh_int_t       *next;
 848 
 849         if (fshi == NULL)
 850                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 851 
 852         next = fshi;
 853         do {
 854                 next = list_next(&vfsp->vfs_fshrecord->fshfsr_list, next);
 855         } while (next != NULL && next->fshi_hooks.mount == NULL);
 856 
 857         return ((*(fshi->fshi_hooks.mount))(next, fshi->fshi_hooks.arg, vfsp,
 858             mvp, uap, cr));
 859 }
 860 
 861 int
 862 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
 863 {
 864         fsh_int_t       *next;
 865 
 866         if (fshi == NULL)
 867                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 868 
 869         next = fshi;
 870         do {
 871                 next = list_next(&vfsp->vfs_fshrecord->fshfsr_list, next);
 872         } while (next != NULL && next->fshi_hooks.unmount == NULL);
 873 
 874         return ((*(fshi->fshi_hooks.unmount))(next, fshi->fshi_hooks.arg,
 875             vfsp, flag, cr));
 876 }