1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 Damian Bogel.  All rights reserved.
  14  */
  15 
  16 #include <sys/debug.h>
  17 #include <sys/fsh.h>
  18 #include <sys/fsh_impl.h>
  19 #include <sys/ksynch.h>
  20 #include <sys/list.h>
  21 #include <sys/sunddi.h>
  22 #include <sys/sysmacros.h>
  23 #include <sys/types.h>
  24 #include <sys/vfs.h>
  25 #include <sys/vnode.h>
  26 
  27 /*
  28  * Filesystem hook framework (FSH)
  29  *
  30  * 1. Abstract.
  31  * The main goal of the filesystem hook framework is to provide an easy way to
  32  * inject consumer-defined behaviour into vfs/vnode calls. Because of what
  33  * zones and ZFS offer, we narrow our hooking system to whole filesystems, not
  34  * single vnodes or filesystem subtrees.
  35  *
  36  * 2. Overview.
  37  * fsh_t is the main object in the FSH. An fsh_t is a structure containing:
  38  *      - pointers to hooking functions (hook_foo, where foo is the name of
  39  *      a corresponding vnodeop/vfsop)
  40  *      - a pointer to an argument to pass (this is shared for all the
  41  *      hooks in a given fsh_t)
  42  *
  43  * We install a fsh_t on a whole filesystem, but one fsh_t can be installed on
  44  * many filesystems.
  45  *
  46  * 3. Usage.
  47  * It is assumed that vfs_t/vnode_t that are passed to fsh_foo() functions are
  48  * held by the caller.
  49  *
  50  * fsh_t is a structure filled out by the consumer. If a consumer does not
  51  * want to add/remove a hook for function foo(), he should fill the
  52  * hook_foo() field of fsh_t with NULL before calling
  53  * fsh_hook_install/remove(). The type of hook_foo() is the type of foo() with
  54  * two additional arguments:
  55  *      - fsh_int_t *fsh_int - this argument MUST be passed to
  56  *      hook_next_foo(). FSH would't know which hook to execute next
  57  *      without it.
  58  *      - void *arg - this is the argument passed with fsh_t during
  59  *      installation
  60  * All the information passed with fsh_t is copied by FSH, so it's safe for
  61  * the caller to do anything with it. Keep in mind that this structure is also
  62  * used for removing hooks, so the consumer should somehow remember all of
  63  * it's contents.
  64  *
  65  * Every hook function is responsible for passing the control to the next
  66  * hook associated with a particular call. In order to provide an easy way to
  67  * modify the behaviour of a function call both before and after the
  68  * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
  69  * fsh_next_foo() at some point. This function does necessary internal
  70  * operations and calls the next hook, until there's no hook left, then it
  71  * calls the underlying vfsop/vnodeop.
  72  * Example:
  73  * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
  74  *      cmn_err(CE_NOTE, "freefs called!\n");
  75  *      return (fsh_next_freefs(fsh_int, vfsp));
  76  * }
  77  *
  78  * A consumer might want to receive some notifications about vfs_t entries
  79  * being created/destroyed. There's a fsh_callback_t structure provided to
  80  * install such callbacks.
  81  *
  82  * 4. API (fsh.h)
  83  * fsh_fs_enable(vfs_t *vfsp)
  84  * fsh_fs_disable(vfs_t *vfsp)
  85  *      Enables/disables fshook per filesystem.
  86  *
  87  * fsh_hook_install(vfs_t *vfsp, fsh_t *fsh)
  88  *      Installs hooks on vfsp filesystem. It's important that hooks are
  89  *      executed in LIFO installation order, which means that if there are
  90  *      hooks A and B installed in this order, B is going to be execute
  91  *      before A.
  92  *
  93  * fsh_hook_remove(vfs_t *vfsp, fsh_t *fsh)
  94  *      Removes hooks from vfsp filesystem.
  95  *
  96  * fsh_next_xxx(fsh_node_t *fsh_node, void *arg, ARGUMENTS)
  97  *      This is the function which should be called once in every hook. It
  98  *      does the necessary internal operations and passes control to the
  99  *      next hook or, if there's no hook left, to the underlying
 100  *      vfsop/vnodeop.
 101  *
 102  * fsh_callback_install(fsh_callback_t *fsh_callback)
 103  * fsh_callback_remove(fsh_callback_t *fsh_callback)
 104  *      Installs/remove a callback for vfs_t mount/free. The mount callback
 105  *      is executed right before domount() returns. The free callback is
 106  *      called right before VFS_FREEVFS() is called.
 107  *
 108  * 5. API for vfs.c and vnode.c (fsh_impl.h)
 109  * fsh_init()
 110  *      This call has to be done in vfsinit(). It initialises the FSH. It
 111  *      is absolutely necessary that this call is made before any other FSH
 112  *      operation.
 113  *
 114  * fsh_exec_mount_callbacks(vfs_t *vfsp)
 115  * fsh_exec_free_callbacks(vfs_t *vfsp)
 116  *      Used to execute all FSH callback for vfs_t mount/free.
 117  *
 118  * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
 119  *      Destroys a fsh_fsrecord structure.
 120  *
 121  * fsh_foo(ARGUMENTS)
 122  *      Function used to start executing the hook chain for a given call
 123  *      (foo).
 124  *
 125  * 6. Internals.
 126  * fsh_fsrecord_t is a structure which lives inside vfs_t.
 127  * fsh_fsrecord_t contains:
 128  *      - an array of fsh_list_t, one for each vfsop/vnodeop. fsh_list_t
 129  *      is just a list of hooks for a particular vfsop/vnodeop
 130  *      - a flag which tells if FSH is enabled on this filesystem
 131  *
 132  * Unfortunately, because of unexpected behaviour of some filesystems (no
 133  * use of vfs_alloc()/vfs_init()) there's no good place to initialise the
 134  * fsh_fshrecord structure. The approach being used here is to check if it's
 135  * initialised in every call. Because of the fact that no lock could be used
 136  * here (the same problem with initialisation), a spinlock is used. This is
 137  * explained in more detail in a comment before FSH_PREPARE_FSREC(), a macro
 138  * that should be used whenever a vfsp->vfs_fshrecord needs to be fetched.
 139  * After doing that, it's completely safe to keep this pointer locally,
 140  * because it won't be changed until vfs_free() is called.
 141  *
 142  * fsh_list_t is a RW locked hook list, designed to contain hooks for one
 143  * operation (that's why it's nodes don't contain such information). Every
 144  * hook is internally kept as an fsh_int_t structure, which is filled out
 145  * using information from a consumer-provided fsh_t. The fsh_node_t is just
 146  * this list node containig fsh_int_t. Since fsh_list_t is an RW locked list,
 147  * installing and removing hooks may cause delays in filesystem operations.
 148  *
 149  * fsh_next_xxx()
 150  * This function is quite simple. It takes the fsh_int_t passed to this
 151  * function and passes control to the next hook or to the underlying
 152  * vnodeop/vfsop.
 153  *
 154  * Callbacks installed with fsh_callback_install/remove() are executed by
 155  * calling fsh_exec_mount/free_callbacks() in domount()/vfs_rele() (when
 156  * vfs_t's reference count drops to 0).
 157  *
 158  * 7. Concurrency
 159  * FSH does no vfs_t nor vnode_t locking. It is expected that whenever it is
 160  * needed, the consumer does that before/after calling FSH API.
 161  *
 162  */
 163 
 164 
 165 /* Structure used for mount/free callbacks. */
 166 static fsh_callback_list_t fsh_clist;
 167 
 168 /*
 169  * A reserved pointer to FSH. It is used because of the method chosen for
 170  * solving concurrency issues for vfs_fshrecord. The full explanation
 171  * is in the big theory statement at the beginning of this file.
 172  * It is initialised in fsh_init().
 173  */
 174 static void *fsh_res_ptr;
 175 
 176 static struct fsh_fsrecord *fsh_fsrec_create();
 177 
 178 /*
 179  * Important note:
 180  * Before using this macro, fsh_init() MUST be called. We do that in
 181  * vfsinit()@vfs.c.
 182  *
 183  * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
 184  * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) does not
 185  * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
 186  * filesystems could do the same thing. That's why this macro is introduced.
 187  * It should be called before any code that needs access to vfs_fshrecord.
 188  *
 189  * Locking:
 190  * There are no locks here, because there's no good place to initialise
 191  * the lock. Concurrency issues are solved by using atomic instructions
 192  * and a spinlock, which is spinning only once for a given vfs_t. Because
 193  * of that, the usage of spinlock isn't bad at all.
 194  *
 195  * How it works:
 196  * a) if vfsp->vfs_fshrecord is NULL, atomic_cas_ptr changes it to
 197  *      fsh_res_ptr. That's a signal for other calls, that the structure
 198  *      is being initialised. Then the creation of vfs_fshrecord is done.
 199  * b) if vfsp->vfs_fshrecord is fsh_res_ptr, that means we have to wait,
 200  *      because vfs_fshrecord is being initialised by another call.
 201  * c) other cases:
 202  *      vfs_fshrecord is already initialised, so we can use it.
 203  */
 204 #define FSH_PREPARE_FSREC(vfsp)                                         \
 205 do {                                                                    \
 206         fsh_fsrecord_t *fsrec;                                          \
 207                                                                         \
 208         while ((fsrec = atomic_cas_ptr(&(vfsp)->vfs_fshrecord, NULL,     \
 209                 fsh_res_ptr)) == fsh_res_ptr);                          \
 210         if ((fsrec) == NULL) {                                          \
 211                 atomic_swap_ptr(&(vfsp)->vfs_fshrecord,                  \
 212                         fsh_fsrec_create());                            \
 213         }                                                               \
 214 _NOTE(CONSTCOND)                                                        \
 215 } while (0)
 216 
 217 /*
 218  * API for enabling/disabling FSH per vfs_t.
 219  * Atomic operations are used for changing vfs_fshrecord->fshfsr_enabled.
 220  */
 221 void
 222 fsh_fs_enable(vfs_t *vfsp)
 223 {
 224         FSH_PREPARE_FSREC(vfsp);
 225         atomic_or_uint(&vfsp->vfs_fshrecord->fshfsr_enabled, 1);
 226 }
 227 
 228 void
 229 fsh_fs_disable(vfs_t *vfsp)
 230 {
 231         FSH_PREPARE_FSREC(vfsp);
 232         atomic_and_uint(&vfsp->vfs_fshrecord->fshfsr_enabled, 0);
 233 }
 234 
 235 /*
 236  * This macro, like FSH_REMOVE is introduced because of the fact that
 237  * the code for installing a hook looks almost exactly the same for
 238  * every vop/vfsop.
 239  * The usage of these macros is pretty simple. One has to provide pointers to
 240  * fsh_t, fsh_fsrecord_t and a name of operation to be installed both in
 241  * lowercase and uppercase.
 242  */
 243 #define FSH_INSTALL(type, hooks, fsrec, lower, upper)                   \
 244 do {                                                                    \
 245         fsh_int_t *fshi;                                                \
 246         fsh_list_t *list;                                               \
 247                                                                         \
 248         if (hooks->hook_##lower) {                                   \
 249                 fshi = (fsh_int_t *)kmem_alloc(sizeof (*fshi),          \
 250                     KM_SLEEP);                                          \
 251                 fshi->fshi_fn.hook_##lower = hooks->hook_##lower; \
 252                 fshi->fshi_arg = hooks->arg;                              \
 253                                                                         \
 254                 list = &fsrec->fshfsr_opv[FSH_##type##_##upper]; \
 255                 rw_enter(&list->fshl_lock, RW_WRITER);                   \
 256                 list_insert_head(&list->fshl_list, fshi);                \
 257                 rw_exit(&list->fshl_lock);                               \
 258         }                                                               \
 259 _NOTE(CONSTCOND)                                                        \
 260 } while (0)
 261 
 262 #define FSH_INSTALL_VN(hooks, fsrec, lower, upper)                      \
 263         FSH_INSTALL(VOP, hooks, fsrec, lower, upper)
 264 
 265 #define FSH_INSTALL_VFS(hooks, fsrec, lower, upper)                     \
 266         FSH_INSTALL(VFS, hooks, fsrec, lower, upper)
 267 
 268 void
 269 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
 270 {
 271         fsh_fsrecord_t  *fsrec;
 272 
 273         FSH_PREPARE_FSREC(vfsp);
 274         fsrec = vfsp->vfs_fshrecord;
 275 
 276         FSH_INSTALL_VN(hooks, fsrec, read, READ);
 277         FSH_INSTALL_VN(hooks, fsrec, write, WRITE);
 278         FSH_INSTALL_VFS(hooks, fsrec, mount, MOUNT);
 279         FSH_INSTALL_VFS(hooks, fsrec, unmount, UNMOUNT);
 280 }
 281 
 282 /*
 283  * See comment above FSH_INSTALL.
 284  * Note: This macro does nothing when a hook to remove was not found.
 285  */
 286 #define FSH_REMOVE(type, hooks, fsrec, lower, upper)                    \
 287 do {                                                                    \
 288         fsh_int_t *fshi;                                                \
 289         fsh_list_t *list;                                               \
 290         if (hooks->hook_##lower == NULL)                             \
 291                 break;                                                  \
 292                                                                         \
 293         list = &fsrec->fshfsr_opv[FSH_##type##_##upper];         \
 294         rw_enter(&list->fshl_lock, RW_WRITER);                           \
 295                                                                         \
 296         fshi = list_head(&list->fshl_list);                              \
 297                                                                         \
 298         while (fshi &&                                                  \
 299             !(fshi->fshi_fn.hook_##lower == hooks->hook_##lower &&        \
 300             fshi->fshi_arg == hooks->arg))                                \
 301                 fshi = list_next(&list->fshl_list, fshi);                \
 302                                                                         \
 303         if (fshi == NULL) {                                             \
 304                 rw_exit(&list->fshl_lock);                               \
 305                 break;                                                  \
 306         }                                                               \
 307                                                                         \
 308         list_remove(&list->fshl_list, fshi);                             \
 309                                                                         \
 310         rw_exit(&list->fshl_lock);                                       \
 311                                                                         \
 312         kmem_free(fshi, sizeof (*fshi));                                \
 313 _NOTE(CONSTCOND)                                                        \
 314 } while (0)
 315 
 316 #define FSH_REMOVE_VN(hooks, fsrec, lower, upper)                       \
 317         FSH_REMOVE(VOP, hooks, fsrec, lower, upper)
 318 
 319 #define FSH_REMOVE_VFS(hooks, fsrec, lower, upper)                      \
 320         FSH_REMOVE(VFS, hooks, fsrec, lower, upper)
 321 
 322 void
 323 fsh_hook_remove(vfs_t *vfsp, fsh_t *hooks)
 324 {
 325         fsh_fsrecord_t  *fsrec;
 326 
 327         FSH_PREPARE_FSREC(vfsp);
 328         fsrec = vfsp->vfs_fshrecord;
 329 
 330         FSH_REMOVE_VN(hooks, fsrec, read, READ);
 331         FSH_REMOVE_VN(hooks, fsrec, write, WRITE);
 332         FSH_REMOVE_VFS(hooks, fsrec, mount, MOUNT);
 333         FSH_REMOVE_VFS(hooks, fsrec, unmount, UNMOUNT);
 334 }
 335 
 336 /*
 337  * API for installing/removing global mount/free callbacks.
 338  * It's safe to call these functions whenever after fsh_init() was called.
 339  * The fsh_clist is rwlocked. fsh_callback_install/remove() are
 340  * the only writers and fsh_exec_mount/free_callbacks() are the only readers.
 341  */
 342 void
 343 fsh_callback_install(fsh_callback_t *fsh_callback)
 344 {
 345         fsh_callback_int_t *fshci;
 346 
 347         fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
 348         (void) memcpy(&fshci->fshci_callback, fsh_callback,
 349             sizeof (fshci->fshci_callback));
 350 
 351         rw_enter(&fsh_clist.fshcl_lock, RW_WRITER);
 352         list_insert_head(&fsh_clist.fshcl_list, fshci);
 353         rw_exit(&fsh_clist.fshcl_lock);
 354 }
 355 
 356 /*
 357  * fsh_callback_t objects are compared by a simple memcmp() here. That's
 358  * by design, because we'd like to be absolutely sure that we delete the
 359  * callbacks we wanted (the same argument and callback functions).
 360  */
 361 void
 362 fsh_callback_remove(fsh_callback_t *fsh_callback)
 363 {
 364         fsh_callback_int_t *fshci;
 365 
 366         rw_enter(&fsh_clist.fshcl_lock, RW_WRITER);
 367         fshci = list_head(&fsh_clist.fshcl_list);
 368 
 369         while (fshci && memcmp(fsh_callback, &fshci->fshci_callback,
 370             sizeof (*fsh_callback)))
 371                 fshci = list_next(&fsh_clist.fshcl_list, fshci);
 372 
 373         if (fshci == NULL) {
 374                 rw_exit(&fsh_clist.fshcl_lock);
 375                 return;
 376         }
 377 
 378         list_remove(&fsh_clist.fshcl_list, fshci);
 379         rw_exit(&fsh_clist.fshcl_lock);
 380 
 381         kmem_free(fshci, sizeof (*fshci));
 382 }
 383 
 384 /*
 385  * This function is executed right before returning from domount()@vfs.c.
 386  * We are sure that it's called only after fsh_init().
 387  * It does all the mount callbacks installed in the FSH.
 388  */
 389 void
 390 fsh_exec_mount_callbacks(vfs_t *vfsp)
 391 {
 392         fsh_callback_int_t *fshci;
 393         fsh_callback_t *callback;
 394 
 395         rw_enter(&fsh_clist.fshcl_lock, RW_READER);
 396         fshci = list_head(&fsh_clist.fshcl_list);
 397         while (fshci) {
 398                 callback = &fshci->fshci_callback;
 399                 if (callback->fshc_mount)
 400                         (*(callback->fshc_mount))(vfsp, callback->fshc_arg);
 401                 fshci = list_next(&fsh_clist.fshcl_list, fshci);
 402         }
 403         rw_exit(&fsh_clist.fshcl_lock);
 404 }
 405 
 406 /*
 407  * This function is executed right before VFS_FREEVFS() is called in
 408  * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
 409  * It does all the free callbacks installed in the FSH.
 410  */
 411 void
 412 fsh_exec_free_callbacks(vfs_t *vfsp)
 413 {
 414         fsh_callback_int_t *fshci;
 415         fsh_callback_t *callback;
 416 
 417         rw_enter(&fsh_clist.fshcl_lock, RW_READER);
 418         fshci = list_head(&fsh_clist.fshcl_list);
 419         while (fshci) {
 420                 callback = &fshci->fshci_callback;
 421                 if (callback->fshc_free)
 422                         (*(callback->fshc_free))(vfsp, callback->fshc_arg);
 423                 fshci = list_next(&fsh_clist.fshcl_list, fshci);
 424         }
 425         rw_exit(&fsh_clist.fshcl_lock);
 426 }
 427 
 428 /*
 429  * API for vnode.c/vfs.c to start executing the FSH for a given operation.
 430  *
 431  * These interfaces are using fsh_res_ptr (in FSH_PREPARE_FSREC()), so it's
 432  * absolutely necessary to call fsh_init() before using them. That's done in
 433  * vfsinit().
 434  *
 435  * While these functions are executing, it's expected that necessary vfs_t's
 436  * are held so that vfs_free() isn't called. vfs_free() expects that noone
 437  * else accesses vfs_fshrecord of a given vfs_t.
 438  * It's also the caller responsibility to keep vnode_t passed to fsh_xxx()
 439  * alive and valid.
 440  * All these expectations are met because these functions are used only in
 441  * correspondng fop/fsop_xxx().
 442  *
 443  * Although fsrec->fshfsr_enabled is shared by everyone who uses a given
 444  * vfs_t, it can be accessed in the usual way. There's no locking involved
 445  * because all the changes of this field are made by atomic operations in
 446  * fsh_fs_enable/disable().
 447  */
 448 int
 449 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 450         caller_context_t *ct)
 451 {
 452         fsh_fsrecord_t *fsrec;
 453         fsh_list_t *list;
 454         int ret;
 455 
 456         FSH_PREPARE_FSREC(vp->v_vfsp);
 457         fsrec = vp->v_vfsp->vfs_fshrecord;
 458         if (!(fsrec->fshfsr_enabled))
 459                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 460 
 461         list = &fsrec->fshfsr_opv[FSH_VOP_READ];
 462         rw_enter(&list->fshl_lock, RW_READER);
 463         if (list_head(&list->fshl_list) == NULL)
 464                 ret =  (*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct);
 465         else
 466                 ret = fsh_next_read(list_head(&list->fshl_list), vp, uiop,
 467                     ioflag, cr, ct);
 468         rw_exit(&list->fshl_lock);
 469 
 470         return (ret);
 471 }
 472 
 473 int
 474 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 475         caller_context_t *ct)
 476 {
 477         fsh_fsrecord_t *fsrec;
 478         fsh_list_t *list;
 479         int ret;
 480 
 481         FSH_PREPARE_FSREC(vp->v_vfsp);
 482         fsrec = vp->v_vfsp->vfs_fshrecord;
 483         if (!(fsrec->fshfsr_enabled))
 484                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 485 
 486         list = &fsrec->fshfsr_opv[FSH_VOP_WRITE];
 487         rw_enter(&list->fshl_lock, RW_READER);
 488         if (list_head(&list->fshl_list) == NULL)
 489                 ret =  (*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct);
 490         else
 491                 ret = fsh_next_write(list_head(&list->fshl_list), vp, uiop,
 492                     ioflag, cr, ct);
 493         rw_exit(&list->fshl_lock);
 494 
 495         return (ret);
 496 }
 497 
 498 int
 499 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 500 {
 501         fsh_fsrecord_t *fsrec;
 502         fsh_list_t *list;
 503         int ret;
 504 
 505         FSH_PREPARE_FSREC(vfsp);
 506         fsrec = vfsp->vfs_fshrecord;
 507         if (!(fsrec->fshfsr_enabled))
 508                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 509 
 510         list = &fsrec->fshfsr_opv[FSH_VFS_MOUNT];
 511         rw_enter(&list->fshl_lock, RW_READER);
 512         if (list_head(&list->fshl_list) == NULL)
 513                 ret = (*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr);
 514         else
 515                 ret = fsh_next_mount(list_head(&list->fshl_list), vfsp, mvp,
 516                     uap, cr);
 517         rw_exit(&list->fshl_lock);
 518 
 519         return (ret);
 520 }
 521 
 522 int
 523 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 524 {
 525         fsh_fsrecord_t *fsrec;
 526         fsh_list_t *list;
 527         int ret;
 528 
 529         FSH_PREPARE_FSREC(vfsp);
 530         fsrec = vfsp->vfs_fshrecord;
 531         if (!(fsrec->fshfsr_enabled))
 532                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 533 
 534         list = &fsrec->fshfsr_opv[FSH_VFS_UNMOUNT];
 535         rw_enter(&list->fshl_lock, RW_READER);
 536         if (list_head(&list->fshl_list) == NULL)
 537                 ret = (*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr);
 538         else
 539                 ret = fsh_next_unmount(list_head(&list->fshl_list), vfsp,
 540                     flag, cr);
 541         rw_exit(&list->fshl_lock);
 542 
 543         return (ret);
 544 }
 545 
 546 /*
 547  * This is the funtion used by FSH_PREPARE_FSREC() to allocate a new
 548  * fsh_fsrecord. This function is called by the first function which
 549  * access the vfs_fshrecord and finds out it's NULL.
 550  */
 551 static struct fsh_fsrecord *
 552 fsh_fsrec_create()
 553 {
 554         struct fsh_fsrecord *fsrecp;
 555         int i;
 556 
 557         fsrecp = (struct fsh_fsrecord *)kmem_zalloc(sizeof (*fsrecp),
 558             KM_SLEEP);
 559         fsrecp->fshfsr_enabled = 1;
 560         for (i = 0; i < FSH_SUPPORTED_OPS_COUNT; i++) {
 561                 rw_init(&fsrecp->fshfsr_opv[i].fshl_lock, NULL, RW_DRIVER,
 562                     NULL);
 563                 list_create(&fsrecp->fshfsr_opv[i].fshl_list,
 564                     sizeof (fsh_int_t), offsetof(fsh_int_t, fshi_next));
 565         }
 566         return (fsrecp);
 567 }
 568 
 569 /*
 570  * This call can be used ONLY in vfs_free(). It's assumed that no other
 571  * fsh_xxx() using the vfs_t that owns the fsh_fsrecord to be destroyed
 572  * is executing while call to fsh_fsrec_destroy() is made. With this
 573  * assumptions, no concurrency issues occur. All rwlocks of fshfsr_opv
 574  * elements are released.
 575  */
 576 void
 577 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
 578 {
 579         int i;
 580         fsh_int_t *fshi;
 581 
 582         for (i = 0; i < FSH_SUPPORTED_OPS_COUNT; i++) {
 583                 while (fshi = list_head(&fsrecp->fshfsr_opv[i].fshl_list)) {
 584                         fshi =
 585                             list_remove_head(&fsrecp->fshfsr_opv[i].fshl_list);
 586                         kmem_free(fshi, sizeof (*fshi));
 587                 }
 588                 list_destroy(&fsrecp->fshfsr_opv[i].fshl_list);
 589                 rw_destroy(&fsrecp->fshfsr_opv[i].fshl_lock);
 590         }
 591         kmem_free(fsrecp, sizeof (*fsrecp));
 592 }
 593 
 594 /*
 595  * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
 596  * before every other FSH call.
 597  */
 598 void
 599 fsh_init(void)
 600 {
 601         rw_init(&fsh_clist.fshcl_lock, NULL, RW_DRIVER, NULL);
 602         list_create(&fsh_clist.fshcl_list, sizeof (fsh_callback_int_t),
 603             offsetof(fsh_callback_int_t, fshci_next));
 604 
 605         fsh_res_ptr = kmem_alloc(1, KM_SLEEP);
 606 }
 607 
 608 /*
 609  * These functions are used to pass control to the next hook or underlying
 610  * vop/vfsop. It's consumer doesn't have to worry about any locking, because
 611  * all the necessities are guaranteed by the fsh_xxx (xxx == open, read etc.)
 612  */
 613 int
 614 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 615         cred_t *cr, caller_context_t *ct)
 616 {
 617         fsh_int_t *next;
 618 
 619         if (fshi == NULL)
 620                 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
 621 
 622         next = list_next(
 623             &vp->v_vfsp->vfs_fshrecord->fshfsr_opv[FSH_VOP_READ].fshl_list,
 624             fshi);
 625 
 626         return ((*(fshi->fshi_fn.hook_read))(next, fshi->fshi_arg, vp, uiop,
 627             ioflag, cr, ct));
 628 }
 629 
 630 int
 631 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
 632         cred_t *cr, caller_context_t *ct)
 633 {
 634         fsh_int_t *next;
 635 
 636         if (fshi == NULL)
 637                 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
 638 
 639         next = list_next(
 640             &vp->v_vfsp->vfs_fshrecord->fshfsr_opv[FSH_VOP_WRITE].fshl_list,
 641             fshi);
 642 
 643         return ((*(fshi->fshi_fn.hook_write))(next, fshi->fshi_arg, vp,
 644             uiop, ioflag, cr, ct));
 645 }
 646 
 647 int
 648 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
 649         cred_t *cr)
 650 {
 651         fsh_int_t *next;
 652 
 653         if (fshi == NULL)
 654                 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
 655 
 656         next = list_next(
 657             &vfsp->vfs_fshrecord->fshfsr_opv[FSH_VFS_MOUNT].fshl_list,
 658             fshi);
 659 
 660         return ((*(fshi->fshi_fn.hook_mount))(next, fshi->fshi_arg, vfsp, mvp,
 661             uap, cr));
 662 }
 663 
 664 int
 665 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
 666 {
 667         fsh_int_t *next;
 668 
 669         if (fshi == NULL)
 670                 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
 671 
 672         next = list_next(
 673             &vfsp->vfs_fshrecord->fshfsr_opv[FSH_VFS_UNMOUNT].fshl_list,
 674             fshi);
 675 
 676         return ((*(fshi->fshi_fn.hook_unmount))(next, fshi->fshi_arg, vfsp,
 677             flag, cr));
 678 }