18 #include <sys/fsh.h>
19 #include <sys/fsh_impl.h>
20 #include <sys/id_space.h>
21 #include <sys/kmem.h>
22 #include <sys/ksynch.h>
23 #include <sys/list.h>
24 #include <sys/sunddi.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/vfs.h>
28 #include <sys/vnode.h>
29
30 /*
31 * Filesystem hook framework (fsh)
32 *
33 * 1. Abstract.
34 * The main goal of the filesystem hook framework is to provide an easy way to
35 * inject client-defined behaviour into vfs/vnode calls. fsh works on
36 * vfs_t granularity.
37 *
38 *
39 * 2. Overview.
40 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
41 * - pointers to hooking functions (named after corresponding
42 * vnodeops/vfsops)
43 * - a pointer to an argument to pass (this is shared for all the
44 * hooks in a given fsh_t)
45 * - a pointer to the *hook remove callback* - it's being fired after a
46 * hook is removed and the hook has stopped executing. It's safe to destroy
47 * any data associated with this hook.
48 *
49 * The information from fsh_t is copied by the fsh and an fsh_handle_t
50 * is returned. It should be used for further removing.
51 *
52 *
53 * 3. Usage.
54 * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
55 * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
56 *
57 * fsh_t is a structure filled out by the client. If a client does not want
58 * to add/remove a hook for function foo(), he should fill the foo field of
59 * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
60 * two additional arguments:
61 * - fsh_int_t *fsh_int - this argument MUST be passed to
62 * hook_next_foo(). fsh wouldn't know which hook to execute next
63 * without it
64 * - void *arg - this is the argument passed with fsh_t during
65 * installation
66 * - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback
67 * (mentioned earlier); it's first argument is arg, the second is the
68 * handle
69 *
70 * After installation, an fsh_handle_t is returned to the caller.
71 *
72 * Every hook function is responsible for passing the control to the next
73 * hook associated with a particular call. In order to provide an easy way to
74 * modify the behaviour of a function call both before and after the
75 * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
76 * fsh_next_foo() at some point. This function does necessary internal
77 * operations and calls the next hook, until there's no hook left, then it
78 * calls the underlying vfsop/vnodeop.
79 * Example:
80 * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
81 * cmn_err(CE_NOTE, "freefs called!\n");
82 * return (fsh_next_freefs(fsh_int, vfsp));
83 * }
84 *
85 *
86 * A client might want to fire callbacks when vfs_t's are being mounted
87 * or freed. There's an fsh_callback_t structure provided to install such
88 * callbacks along with the API.
89 * It is legal to call fsh_hook_{install,remove}() inside a mount callback
90 * WITHOUT holding the vfs_t.
91 *
92 * After vfs_t's free callback returns, all the handles associated with the
93 * hooks installed on this vfs_t are invalid and must not be used.
94 *
95 *
96 * 4. API
97 * None of the APIs should be called during interrupt context above lock
98 * level. The only exceptions are fsh_next_foo() functions, which do not use
99 * locks.
100 *
101 * a) fsh.h
102 * Any of these functions could be called inside a hook or a hook remove
103 * callback.
104 * fsh_callback_{install,remove}() must not be called inside a {mount,free}
105 * callback. Doing so will cause a deadlock. Other functions can be called
106 * inside {mount,free} callbacks.
107 *
108 * fsh_fs_enable(vfs_t *vfsp)
109 * fsh_fs_disable(vfs_t *vfsp)
110 * Enables/disables fsh for a given vfs_t.
111 *
112 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
113 * Installs hooks on vfsp filesystem.
114 * It's important that hooks are executed in LIFO installation order,
115 * which means that if there are hooks A and B installed in this order, B
116 * is going to be executed before A.
117 * It returns a correct handle, or (-1) if hook/callback limit exceeded.
118 * The handle is valid until a free callback returns or an explicit call
119 * to fsh_hook_remove().
120 *
121 * fsh_hook_remove(fsh_handle_t handle)
122 * Removes a hook and invalidates the handle.
123 * It is guaranteed that after this funcion returns, calls to
124 * vnodeops/vfsops won't go through this hook, although there might be
125 * some threads still executing this hook. When hook remove callback is
126 * fired, it is guaranteed that the hook won't be executed anymore. It is
127 * safe to remove all the internal data associated with this hook inside
128 * the hook remove callback. The hook remove callback could be called
129 * inside fsh_hook_remove().
130 *
131 * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
132 * This is the function which should be called once in every hook. It
133 * does the necessary internal operations and passes control to the
134 * next hook or, if there's no hook left, to the underlying
135 * vfsop/vnodeop.
136 *
137 * fsh_callback_install(fsh_callback_t *callback)
138 * fsh_callback_remove(fsh_callback_handle_t handle)
139 * Installs/removes callbacks for vfs_t mount/free. The mount callback
140 * is executed right before domount() returns. The free callback is
141 * called right before VFS_FREEVFS() is called.
142 * The fsh_callback_install() returns a correct handle, or (-1) if
143 * hook/callback limit exceeded.
144 *
145 * b) fsh_impl.h (for vfs.c and vnode.c only)
146 * fsh_init()
147 * This call has to be done in vfsinit(). It initialises the fsh. It
148 * is absolutely necessary that this call is made before any other fsh
149 * operation.
150 *
151 * fsh_exec_mount_callbacks(vfs_t *vfsp)
152 * fsh_exec_free_callbacks(vfs_t *vfsp)
153 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
154 *
155 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
156 * Destroys an fsh_fsrecord structure. All the hooks installed on this
157 * vfs_t are then destroyed. free callback is called before this function.
158 *
159 * fsh_foo(ARGUMENTS)
160 * Function used to start executing the hook chain for a given call.
161 *
162 *
163 * 5. Internals.
164 * fsh_int_t is an internal hook structure. It is reference counted.
165 * fshi_hold() and fshi_rele() should be used whenever needed.
166 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
167 * (local to vfs_t). All entries are unique and are identified by fshi_handle.
168 *
169 * fsh_int_t properties:
170 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both
171 * fsh_map and fshfsr_list
172 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
173 * from fsh_map and marks the hook as *doomed*
174 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
175 * executing it
176 * - if fsh_int_t is marked as *doomed*, the reference counter is not
177 * be increased and thus no thread can acquire this fsh_int_t
178 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
179 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
180 * - fsh_int_t could be also destroyed without fsh_hook_remove() call,
181 * that happens only inside fsh_fsrec_destroy() where it is guaranteed
182 * that there is no thread executing the hook
183 *
184 *
185 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
186 * fsh_fsrecord_t contains:
187 * - an rw-lock that protects the structure
188 * - a list of hooks installed on this vfs_t
189 * - a flag which tells whether fsh is enabled on this vfs_t
190 *
191 *
192 * fsh_prepare_fsrec rule:
193 * Every function that needs vfsp->vfs_fshrecord has to call
194 * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to
195 * use vfsp->vfs_fshrecord.
196 *
197 * Unfortunately, because of unexpected behaviour of some filesystems (no use
198 * of vfs_alloc()/vfs_init()) there's no good place to initialise the
199 * fsh_fshrecord_t structure. The approach being used here is to check if it's
200 * initialised in every call. Because of the fact that no lock could be used
201 * here (the same problem with initialisation), a spinlock is used. This is
202 * explained in more detail in a comment before fsh_prepare_fsrec(). After
203 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
204 * pointer locally, because it won't be changed until vfs_free() is called.
205 *
206 * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
207 * where there is expected that no other fsh calls would be made for the
208 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
209 * valid pointer and could not be concurrently accessed.
210 *
211 * When there are no fsh functions (that use a particular fsh_fsrecord_t)
212 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
213 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
214 *
215 *
216 * Callbacks:
217 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
218 * before returning from domount()@vfs.c.
219 *
220 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
221 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
222 *
223 *
224 * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
225 * This function is quite simple. It takes the fsh_int_t and passes control
226 * to the next hook or to the underlying vnodeop/vfsop.
227 *
228 *
229 * 6. Locking
230 * a) public
231 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
232 * needed, the client does that.
233 *
234 * fsh_callback_{install,remove} must not be called inside a callback, because
235 * it will cause a deadlock.
236 *
237 * b) internal
238 * Locking diagram:
239 *
240 * fsh_hook_install() fsh_hook_remove() fsh_fsrec_destroy()
241 * | | |
242 * | | |
243 * +------------------+ | +------------+
244 * | | |
245 * V V V
246 * fsh_lock
247 * | |
248 * | +----- fshfsr_lock, RW_WRITER ---+
249 * | |
250 * V |
251 * +---------------------------------------+ |
252 * | fsh_map | |
253 * | | |
254 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+
255 * | +------------------------------^--------+
256 * | |
257 * | |
258 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER
259 * | |
260 * | |
261 * fsh_read(), fshi_rele()
262 * fsh_write(),
263 * ..., Might be called from:
264 * fsh_next_read(), fsh_hook_remove()
265 * fsh_next_write(), fsh_read(), fsh_write(), ...
266 * ... fsh_next_read(), fsh_next_write(), ...
267 *
268 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
269 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
270 * it destroys the unremoved hooks). It is used only when fsh_map needs to be
271 * locked. The usage of this lock guarantees that the data in fsh_map and
272 * fshfsr_lists is consistent.
273 */
274
275
276 /* Internals */
277 struct fsh_int {
278 fsh_handle_t fshi_handle;
279 fsh_t fshi_hooks;
280 vfs_t *fshi_vfsp;
281
282 kmutex_t fshi_lock;
283 uint64_t fshi_ref;
284 uint64_t fshi_doomed; /* changed inside fsh_lock */
285
286 /* next node in fshfsr_list */
287 list_node_t fshi_next;
288
289 /* next node in fsh_map */
290 list_node_t fshi_global;
291 };
292
293 typedef struct fsh_callback_int {
294 fsh_callback_t fshci_cb;
295 fsh_callback_handle_t fshci_handle;
296 list_node_t fshci_next;
297 } fsh_callback_int_t;
298
299
300 static kmutex_t fsh_lock;
301
302 /*
303 * fsh_fsrecord_t is the main internal structure. It's content is protected
304 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
305 * the vfs_t that contains the fsh_fsrecord_t.
306 */
307 struct fsh_fsrecord {
308 krwlock_t fshfsr_lock;
309 int fshfsr_enabled;
310 list_t fshfsr_list;
311 };
312
313 /*
314 * Global list of fsh_int_t. Protected by fsh_lock.
315 */
316 static list_t fsh_map;
317
318 /*
319 * Global list of fsh_callback_int_t.
320 */
321 static krwlock_t fsh_cblist_lock;
322 static list_t fsh_cblist;
323
324 /*
325 * A reserved pointer for fsh purposes. It is used because of the method
326 * chosen for solving concurrency issues with vfs_fshrecord. The full
327 * explanation is in the big theory statement at the beginning of this
328 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
329 */
330 static void *fsh_res_ptr;
331
332 static fsh_fsrecord_t *fsh_fsrec_create();
333
334 int fsh_limit = INT_MAX;
335 static id_space_t *fsh_idspace;
336
337 /*
338 * fsh_prepare_fsrec()
339 *
340 * Important note:
341 * Before using this function, fsh_init() MUST be called. We do that in
342 * vfsinit()@vfs.c.
343 *
344 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
345 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
346 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
347 * filesystems could do the same thing. That's why this solution is
348 * introduced. It should be called before any code that needs access to
349 * vfs_fshrecord.
350 *
351 * Locking:
352 * There are no locks here, because there's no good place to initialise
353 * the lock. Concurrency issues are solved by using atomic instructions
354 * and a spinlock, which is spinning only once for a given vfs_t. Because
355 * of that, the usage of the spinlock isn't bad at all.
356 *
357 * How it works:
358 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
359 * fsh_res_ptr. That's a signal for other threads, that the structure
360 * is being initialised.
361 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
362 * because vfs_fshrecord is being initialised by another call.
363 * c) other cases:
364 * vfs_fshrecord is already initialised, so we can use it. It won't change
365 * until vfs_free() is called. It can't happen when someone is holding
366 * the vfs_t, which is expected from the caller of fsh API.
367 */
368 static void
369 fsh_prepare_fsrec(vfs_t *vfsp)
370 {
371 fsh_fsrecord_t *fsrec;
372
373 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
374 fsh_res_ptr)) == fsh_res_ptr)
375 ;
376
377 if (fsrec == NULL)
378 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
379 }
380
381 /*
382 * API for enabling/disabling fsh per vfs_t.
383 *
384 * A newly created vfs_t has fsh enabled by default. If one would want to change
385 * this behaviour, mount callbacks could be used.
386 *
387 * The caller is expected to hold the vfs_t.
388 *
389 * These functions must NOT be called in a hook.
390 */
391 void
392 fsh_fs_enable(vfs_t *vfsp)
393 {
394 fsh_prepare_fsrec(vfsp);
395
396 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
397 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
398 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
399 }
400
401 void
402 fsh_fs_disable(vfs_t *vfsp)
403 {
404 fsh_prepare_fsrec(vfsp);
405
406 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
407 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
408 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
409 }
410
411 /*
412 * API used for installing hooks. fsh_handle_t is returned for further
413 * actions (currently just removing) on this set of hooks.
414 *
415 * fsh_t fields:
416 * - arg - argument passed to every hook
417 * - remove_cb - remove callback, called after a hook is removed and all the
418 * threads stops executing it
419 * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
420 * if there is no hook desired for an operation, it should be set to
421 * NULL
422 *
423 * It's important that the hooks are executed in LIFO installation order (they
424 * are added to the head of the hook list).
425 *
426 * The caller is expected to hold the vfs_t.
427 *
428 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
429 */
430 fsh_handle_t
431 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
432 {
433 fsh_handle_t handle;
434 fsh_int_t *fshi;
435
436 fsh_prepare_fsrec(vfsp);
437
438 if ((handle = id_alloc(fsh_idspace)) == -1)
439 return (-1);
440
441 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
442 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
443 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
444 fshi->fshi_handle = handle;
445 fshi->fshi_doomed = 0;
446 fshi->fshi_ref = 1;
447 fshi->fshi_vfsp = vfsp;
448
449 mutex_enter(&fsh_lock);
450 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
451 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
452 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
453
454 list_insert_head(&fsh_map, fshi);
455 mutex_exit(&fsh_lock);
456
482 fshi_rele(fsh_int_t *fshi)
483 {
484 int destroy;
485
486 mutex_enter(&fshi->fshi_lock);
487 ASSERT(fshi->fshi_ref > 0);
488 fshi->fshi_ref--;
489 if (fshi->fshi_ref == 0) {
490 ASSERT(fshi->fshi_doomed == 1);
491 destroy = 1;
492 } else {
493 destroy = 0;
494 }
495 mutex_exit(&fshi->fshi_lock);
496
497 if (destroy) {
498 /*
499 * At this point, we are sure that fsh_hook_remove() has been
500 * called, that's why we don't remove the fshi from fsh_map.
501 * fsh_hook_remove() did that already.
502 */
503 fsh_fsrecord_t *fsrecp;
504
505 if (fshi->fshi_hooks.remove_cb != NULL)
506 (*fshi->fshi_hooks.remove_cb)(
507 fshi->fshi_hooks.arg, fshi->fshi_handle);
508 /*
509 * We don't have to call fsh_prepare_fsrec() here.
510 * fsh_fsrecord_t is already initialised, because we've found a
511 * mapping for the given handle.
512 */
513 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
514 ASSERT(fsrecp != NULL);
515 ASSERT(fsrecp != fsh_res_ptr);
516
517 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
518 list_remove(&fsrecp->fshfsr_list, fshi);
519 rw_exit(&fsrecp->fshfsr_lock);
520
521 id_free(fsh_idspace, fshi->fshi_handle);
522 mutex_destroy(&fshi->fshi_lock);
523 kmem_free(fshi, sizeof (*fshi));
524 }
525 }
526
527 /*
528 * Used for removing a hook set.
529 *
530 * fsh_hook_remove() invalidates the given handle.
531 *
532 * It is guaranteed, that after successful return from fsh_hook_remove(),
533 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
534 * go through this hook.
535 *
536 * There is no guarantee that after fsh_hook_remove() returns, the hook
537 * associated with the handle won't be executing. Instead, it is guaranteed that
538 * when remove_cb() is called, the hook finished it's execution in all threads.
539 * It is safe to destroy all internal data associated with this hook inside
540 * remove_cb().
568 mutex_exit(&fsh_lock);
569
570 fshi_rele(fshi);
571
572 return (0);
573 }
574
575 /*
576 * API for installing global mount/free callbacks.
577 *
578 * fsh_callback_t fields:
579 * fshc_arg - argument passed to the callbacks
580 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
581 * drops to 0
582 * fshc_mount - callback fired right before returning from domount()
583 * The first argument of these callbacks is the vfs_t that is mounted/freed.
584 * The second one is the fshc_arg.
585 *
586 * fsh_callback_handle_t is filled out by this function.
587 *
588 * This function must NOT be called in a callback, because it will cause
589 * a deadlock.
590 *
591 * Returns (-1) if hook/callback limit exceeded.
592 */
593 fsh_callback_handle_t
594 fsh_callback_install(fsh_callback_t *callback)
595 {
596 fsh_callback_int_t *fshci;
597 fsh_callback_handle_t handle;
598
599 if ((handle = id_alloc(fsh_idspace)) == -1)
600 return (-1);
601
602 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
603 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
604 fshci->fshci_handle = handle;
605
606 /* If it is called in a {mount,free} callback, causes deadlock. */
607 rw_enter(&fsh_cblist_lock, RW_WRITER);
608 list_insert_head(&fsh_cblist, fshci);
609 rw_exit(&fsh_cblist_lock);
610
611 return (handle);
612 }
613
614 /*
615 * API for removing global mount/free callbacks.
616 *
617 * This function must NOT be called in a callback, because it will cause
618 * a deadlock.
619 *
620 * Returns (-1) if callback wasn't found, 0 otherwise.
621 */
622 int
623 fsh_callback_remove(fsh_callback_handle_t handle)
624 {
625 fsh_callback_int_t *fshci;
626
627 /* If it is called in a {mount,free} callback, causes deadlock. */
628 rw_enter(&fsh_cblist_lock, RW_WRITER);
629 for (fshci = list_head(&fsh_cblist); fshci != NULL;
630 fshci = list_next(&fsh_cblist, fshci)) {
631 if (fshci->fshci_handle == handle) {
632 list_remove(&fsh_cblist, fshci);
633 break;
634 }
635 }
636 rw_exit(&fsh_cblist_lock);
637
638 if (fshci == NULL)
639 return (-1);
640
641 kmem_free(fshci, sizeof (*fshci));
642 id_free(fsh_idspace, handle);
643
644 return (0);
645 }
646
647 /*
648 * This function is executed right before returning from domount()@vfs.c.
649 * We are sure that it's called only after fsh_init().
650 * It executes all the mount callbacks installed in the fsh.
651 *
652 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
653 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
654 * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
655 * section in the big theory statement at the top of this file.
656 */
657 void
658 fsh_exec_mount_callbacks(vfs_t *vfsp)
659 {
660 fsh_callback_int_t *fshci;
661 fsh_callback_t *cb;
662
663 rw_enter(&fsh_cblist_lock, RW_READER);
664 for (fshci = list_head(&fsh_cblist); fshci != NULL;
665 fshci = list_next(&fsh_cblist, fshci)) {
666 cb = &fshci->fshci_cb;
667 if (cb->fshc_mount != NULL)
668 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
669 }
670 rw_exit(&fsh_cblist_lock);
671 }
672
673 /*
674 * This function is executed right before VFS_FREEVFS() is called in
675 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
676 * It executes all the free callbacks installed in the fsh.
677 *
678 * free() callback is the point after the handles associated with the hooks
679 * installed on this vfs_t become invalid
680 */
681 void
682 fsh_exec_free_callbacks(vfs_t *vfsp)
683 {
684 fsh_callback_int_t *fshci;
685 fsh_callback_t *cb;
686
687 rw_enter(&fsh_cblist_lock, RW_READER);
688 for (fshci = list_head(&fsh_cblist); fshci != NULL;
689 fshci = list_next(&fsh_cblist, fshci)) {
690 cb = &fshci->fshci_cb;
691 if (cb->fshc_free != NULL)
692 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
693 }
694 rw_exit(&fsh_cblist_lock);
695 }
696
697 /*
698 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
699 *
700 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
701 * does, it executes it. If not, underlying vnodeop/vfsop is called.
702 *
703 * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
704 * absolutely necessary to call fsh_init() before using them. That's done in
705 * vfsinit().
706 *
707 * While these functions are executing, it's expected that necessary vfs_t's
708 * are held so that vfs_free() isn't called. vfs_free() expects that noone
709 * accesses vfs_fshrecord of a given vfs_t.
710 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
711 * alive and valid.
712 * All these expectations are met because these functions are used only in
713 * correspondng {fop,fsop}_foo() functions.
714 */
715 int
716 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
717 caller_context_t *ct)
718 {
719 int ret;
720 fsh_fsrecord_t *fsrecp;
721 fsh_int_t *fshi;
722
723 fsh_prepare_fsrec(vp->v_vfsp);
724 fsrecp = vp->v_vfsp->vfs_fshrecord;
725
726 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
727 if (!(fsrecp->fshfsr_enabled)) {
728 rw_exit(&fsrecp->fshfsr_lock);
729 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
730 }
731
732 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
733 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
734 if (fshi->fshi_hooks.read != NULL)
735 if (fshi_hold(fshi))
736 break;
737 }
738 rw_exit(&fsrecp->fshfsr_lock);
739
740 if (fshi == NULL)
741 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
742
743 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
744 vp, uiop, ioflag, cr, ct);
745 fshi_rele(fshi);
746 return (ret);
747 }
748
749 int
750 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
751 caller_context_t *ct)
752 {
753 fsh_int_t *fshi;
754 int ret;
755 fsh_fsrecord_t *fsrecp;
756
757 fsh_prepare_fsrec(vp->v_vfsp);
758 fsrecp = vp->v_vfsp->vfs_fshrecord;
759
760 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
761 if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
762 rw_exit(&fsrecp->fshfsr_lock);
763 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
764 }
765
766 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
767 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
768 if (fshi->fshi_hooks.write != NULL)
769 if (fshi_hold(fshi))
770 break;
771 }
772 rw_exit(&fsrecp->fshfsr_lock);
773
774 if (fshi == NULL)
775 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
776
777 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
778 vp, uiop, ioflag, cr, ct);
779 fshi_rele(fshi);
780 return (ret);
781 }
782
783 int
784 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
785 {
786 fsh_fsrecord_t *fsrecp;
787 fsh_int_t *fshi;
788 int ret;
789
790 fsh_prepare_fsrec(vfsp);
791 fsrecp = vfsp->vfs_fshrecord;
792
793 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
794 if (!(fsrecp->fshfsr_enabled)) {
795 rw_exit(&fsrecp->fshfsr_lock);
796 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
797 }
798
799 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
800 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
801 if (fshi->fshi_hooks.mount != NULL)
802 if (fshi_hold(fshi))
803 break;
804 }
805 rw_exit(&fsrecp->fshfsr_lock);
806
807 if (fshi == NULL)
808 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
809
810 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
811 vfsp, mvp, uap, cr);
812 fshi_rele(fshi);
813 return (ret);
814 }
815
816 int
817 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
818 {
819 fsh_fsrecord_t *fsrecp;
820 fsh_int_t *fshi;
821 int ret;
822
823 fsh_prepare_fsrec(vfsp);
824 fsrecp = vfsp->vfs_fshrecord;
825
826 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
827 if (!(fsrecp->fshfsr_enabled)) {
828 rw_exit(&fsrecp->fshfsr_lock);
829 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
830 }
831
832 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
833 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
834 if (fshi->fshi_hooks.unmount != NULL)
835 if (fshi_hold(fshi))
836 break;
837 }
838 rw_exit(&fsrecp->fshfsr_lock);
839
840 if (fshi == NULL)
841 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
842
843 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
844 vfsp, flag, cr);
845 fshi_rele(fshi);
846 return (ret);
847 }
848
849 /*
850 * This is the funtion used by fsh_prepare_fsrec() to allocate a new
851 * fsh_fsrecord. This function is called by the first function which
852 * access the vfs_fshrecord and finds out it's NULL.
853 */
854 static fsh_fsrecord_t *
855 fsh_fsrec_create()
856 {
857 fsh_fsrecord_t *fsrecp;
858
859 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
860 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
861 offsetof(fsh_int_t, fshi_next));
862 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
863 fsrecp->fshfsr_enabled = 1;
864 return (fsrecp);
865 }
866
867
868 /*
869 * This call can be used ONLY in vfs_free(). It's assumed that no other
870 * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
871 * are executing while a call to fsh_fsrec_destroy() is made. With this
872 * assumptions, no concurrency issues occur.
873 *
874 * Before calling this function outside the fsh, it's sufficient and
875 * required to check if the passed fsh_fsrecord * is not NULL. We don't
876 * have to check if it is not equal to fsh_res_ptr, because all the fsh API
877 * calls involving this vfs_t should end before vfs_free() is called
878 * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
879 * guaranteed by the explicit requirement that the caller of fsh API holds
880 * the vfs_t when needed.
881 *
882 * All the remaining hooks are being removed.
883 */
884 void
885 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
886 {
887 fsh_int_t *fshi;
888
889 VERIFY(fsrecp != NULL);
890
891 _NOTE(CONSTCOND)
892 while (1) {
893 mutex_enter(&fsh_lock);
894 /* No need here to hold fshfsr_lock */
895 fshi = list_remove_head(&fsrecp->fshfsr_list);
896 if (fshi == NULL) {
897 mutex_exit(&fsh_lock);
898 break;
899 }
900 ASSERT(fshi->fshi_doomed == 0);
901 list_remove(&fsh_map, fshi);
902 mutex_exit(&fsh_lock);
903
904 if (fshi->fshi_hooks.remove_cb != NULL)
905 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
906 fshi->fshi_handle);
907 id_free(fsh_idspace, fshi->fshi_handle);
908 mutex_destroy(&fshi->fshi_lock);
909 kmem_free(fshi, sizeof (*fshi));
910
911 }
912
913 list_destroy(&fsrecp->fshfsr_list);
914 rw_destroy(&fsrecp->fshfsr_lock);
915 kmem_free(fsrecp, sizeof (*fsrecp));
916 }
917
918 /*
919 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
920 * before every other fsh call.
921 */
922 void
923 fsh_init(void)
924 {
925 rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
926 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
927 offsetof(fsh_callback_int_t, fshci_next));
928
929 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
930
931 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
932 fshi_global));
933
934 /* See comment above fsh_prepare_fsrec() */
935 fsh_res_ptr = (void *)-1;
936
937 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
938 }
939
940 /*
941 * These functions are used to pass control to the next hook or underlying
942 * vop or vfsop. It's client doesn't have to worry about any locking.
943 */
944 int
945 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
946 cred_t *cr, caller_context_t *ct)
947 {
948 int ret;
949 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
950
951 /*
952 * The passed fshi is the previous hook (the one from which we've been
953 * called). We need to find the next one.
954 */
955 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
956 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
957 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
958 if (fshi->fshi_hooks.read != NULL)
959 if (fshi_hold(fshi))
960 break;
961 }
962 rw_exit(&fsrecp->fshfsr_lock);
963
964 if (fshi == NULL)
965 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
966
967 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
968 vp, uiop, ioflag, cr, ct);
969 fshi_rele(fshi);
970 return (ret);
971 }
972
973 int
974 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
975 cred_t *cr, caller_context_t *ct)
976 {
977 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
978 int ret;
979
980 /*
981 * The passed fshi is the previous hook (the one from which we've been
982 * called). We need to find the next one.
983 */
984 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
985 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
986 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
987 if (fshi->fshi_hooks.write != NULL)
988 if (fshi_hold(fshi))
989 break;
990 }
991 rw_exit(&fsrecp->fshfsr_lock);
992
993 if (fshi == NULL)
994 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
995
996 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
997 vp, uiop, ioflag, cr, ct);
998 fshi_rele(fshi);
999 return (ret);
1000 }
1001
1002 int
1003 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
1004 cred_t *cr)
1005 {
1006 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1007 int ret;
1008
1009 /*
1010 * The passed fshi is the previous hook (the one from which we've been
1011 * called). We need to find the next one.
1012 */
1013 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1014 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1015 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1016 if (fshi->fshi_hooks.mount != NULL)
1017 if (fshi_hold(fshi))
1018 break;
1019 }
1020 rw_exit(&fsrecp->fshfsr_lock);
1021
1022 if (fshi == NULL)
1023 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
1024
1025 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
1026 vfsp, mvp, uap, cr);
1027 fshi_rele(fshi);
1028 return (ret);
1029 }
1030
1031 int
1032 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
1033 {
1034 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1035 int ret;
1036
1037 /*
1038 * The passed fshi is the previous hook (the one from which we've been
1039 * called). We need to find the next one.
1040 */
1041 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1042 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1043 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1044 if (fshi->fshi_hooks.unmount != NULL)
1045 if (fshi_hold(fshi))
1046 break;
1047 }
1048 rw_exit(&fsrecp->fshfsr_lock);
1049
1050 if (fshi == NULL)
1051 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
1052
1053 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
1054 vfsp, flag, cr);
1055 fshi_rele(fshi);
1056 return (ret);
1057 }
|
18 #include <sys/fsh.h>
19 #include <sys/fsh_impl.h>
20 #include <sys/id_space.h>
21 #include <sys/kmem.h>
22 #include <sys/ksynch.h>
23 #include <sys/list.h>
24 #include <sys/sunddi.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/vfs.h>
28 #include <sys/vnode.h>
29
30 /*
31 * Filesystem hook framework (fsh)
32 *
33 * 1. Abstract.
34 * The main goal of the filesystem hook framework is to provide an easy way to
35 * inject client-defined behaviour into vfs/vnode calls. fsh works on
36 * vfs_t granularity.
37 *
38 * Note: In this document, both an fsh_t structure and hooking function for a
39 * vnodeop/vfsop is referred to as *hook*.
40 *
41 *
42 * 2. Overview.
43 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
44 * - pointers to hooking functions
45 * - an argument to pass (this is shared for all the hooks in a given
46 * fsh_t)
47 * - a pointer to the *hook remove callback*
48 *
49 * The information from fsh_t is copied by the fsh and an fsh_handle_t
50 * is returned. It should be used for further removing.
51 *
52 *
53 * 3. Usage.
54 * It is expected that vfs_t/vnode_t passed to fsh_foo() functions are held by
55 * the caller when needed. fsh does no vfs_t/vnode_t locking.
56 *
57 * fsh_t is a structure filled out by the client. It contains:
58 * - pointers to hooking functions
59 * - the argument passed to the hooks
60 * - the *hook remove callback*
61 *
62 * If a client does not want to add a hook for function foo(), he should fill
63 * corresponding fields with NULLs. For every vfsop/vnodeop there are two
64 * fields: pre_foo() and post_foo(). These are the functions called before and
65 * after the next hook or underlying vfsop/vnodeop.
66 *
67 * Pre hooks take:
68 * - arg
69 * - pointer to a field containing void* - it should be filled whenever
70 * the client wants to have some data shared by the pre and post hooks in
71 * the same syscall execution. This is called the *instance data*.
72 * - pointers to the arguments passed to the underlying vfsop/vnodeop
73 * Pre hooks return void.
74 *
75 * Post hooks take:
76 * - value returned by the previous post hook or underlying vfsop/vnodeop
77 * - arg
78 * - pointer to the *instance data*
79 * - arguments passed to the underlying vfsop/vnodeop
80 * Post hooks return an int, which should be treated as the vfsop/vnodeop
81 * return value.
82 * Memory allocated by pre hook must be deallocated by the post hook.
83 *
84 * Execution path of hooks A, B, C is as follows:
85 * foo()
86 * preA(argA, &instancepA, ...);
87 * preB(argB, &instancepB, ...);
88 * preC(argC, &instancepC, ...);
89 * ret = VOP_FOO();
90 * ret = postC(ret, argC, instancepC, ...);
91 * ret = postB(ret, argB, instancepB, ...);
92 * ret = postC(ret, argA, instancepA, ...);
93 * return (ret);
94 *
95 * After installation, an fsh_handle_t is returned to the caller.
96 *
97 * Hook remove callback - it's a function being fired after a hook is removed
98 * and no thread is going to execute it anymore. It's safe to destroy all the
99 * data associated with this hook inside it.
100 *
101 * It is guaranteed, that whenever a pre_hook() is called, there will be also
102 * post_hook() called within the same syscall.
103 *
104 * If a hook (HNew) is installed/removed on/from a vfs_t within execution of
105 * another hook (HExec) installed on this vfs_t, the syscall that executes
106 * HExec won't fire HNew.
107 *
108 * A client might want to fire callbacks when vfs_ts are being mounted
109 * or freed. There's an fsh_callback_t structure provided to install such
110 * callbacks along with the API.
111 * It is legal to call fsh_hook_{install,remove}() inside a mount callback
112 * WITHOUT holding the vfs_t.
113 *
114 * After vfs_t's free callback returns, all the handles associated with the
115 * hooks installed on this vfs_t are invalid and must not be used.
116 *
117 * 4. API
118 * None of the APIs should be called during interrupt context above lock
119 * level.
120 *
121 * a) fsh.h
122 * Any of these functions could be called in a hook or a hook remove callback.
123 * The only functions that must not be called inside a {mount,free} callback are
124 * fsd_callback_{install,remove}. Using them will cause a deadlock.
125 *
126 *
127 * fsh_fs_enable(vfs_t *vfsp)
128 * fsh_fs_disable(vfs_t *vfsp)
129 * Enables/disables fsh for a given vfs_t.
130 *
131 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
132 * Installs hooks on vfsp filesystem.
133 * It's important that hooks are executed in LIFO installation order,
134 * which means that if there are hooks A and B installed in this order, B
135 * is going to be executed before A.
136 * It returns a correct handle, or (-1) if hook/callback limit exceeded.
137 * The handle is valid until a free callback returns or an explicit call
138 * to fsh_hook_remove().
139 *
140 * fsh_hook_remove(fsh_handle_t handle)
141 * Removes a hook and invalidates the handle.
142 * It is guaranteed that after this funcion returns, calls to
143 * vnodeops/vfsops won't go through this hook, although there might be
144 * some threads still executing this hook. When hook remove callback is
145 * fired, it is guaranteed that the hook won't be executed anymore. It is
146 * safe to remove all the internal data associated with this hook inside
147 * the hook remove callback. The hook remove callback could be called
148 * inside fsh_hook_remove().
149 *
150 *
151 * fsh_callback_install(fsh_callback_t *callback)
152 * fsh_callback_remove(fsh_callback_handle_t handle)
153 * Installs/removes callbacks for vfs_t mount/free. The mount callback
154 * is executed right before domount() returns. The free callback is
155 * called right before VFS_FREEVFS() is called.
156 * The fsh_callback_install() returns a correct handle, or (-1) if
157 * hook/callback limit exceeded.
158 *
159 *
160 * b) fsh_impl.h (for vfs.c and vnode.c only)
161 * fsh_init()
162 * This call has to be done in vfsinit(). It initialises the fsh. It
163 * is absolutely necessary that this call is made before any other fsh
164 * operation.
165 *
166 * fsh_exec_mount_callbacks(vfs_t *vfsp)
167 * fsh_exec_free_callbacks(vfs_t *vfsp)
168 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
169 *
170 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
171 * Destroys an fsh_fsrecord structure. All the hooks installed on this
172 * vfs_t are then destroyed. free callback is called before this function.
173 *
174 * fsh_foo(ARGUMENTS)
175 * Function used to execute the hook chain for a given syscall.
176 *
177 *
178 * 5. Internals.
179 * fsh_int_t is an internal hook structure. It is reference counted.
180 * fshi_hold() and fshi_rele() should be used whenever needed.
181 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
182 * (local to vfs_t). All entries are unique and are identified by fshi_handle.
183 *
184 * fsh_int_t properties:
185 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both
186 * fsh_map and fshfsr_list
187 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
188 * from fsh_map and marks the hook as *doomed*
189 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
190 * executing it
191 * - if fsh_int_t is marked as *doomed*, the reference counter is not
192 * be increased and thus no thread can acquire this fsh_int_t
193 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
194 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
195 * - fsh_int_t could be also destroyed without fsh_hook_remove() call,
196 * that happens only inside fsh_fsrec_destroy() where it is guaranteed
197 * that there is no thread executing the hook
198 *
199 *
200 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
201 * fsh_fsrecord_t contains:
202 * - an rw-lock that protects the structure
203 * - a list of hooks installed on this vfs_t
204 * - a flag which tells whether fsh is enabled on this vfs_t
205 *
206 *
207 * fsh_fsrec_prepare rule:
208 * Every function that needs vfsp->vfs_fshrecord has to call
209 * fsh_fsrec_prepare() first. If and only if the call is made, it is safe to
210 * use vfsp->vfs_fshrecord.
211 *
212 * Unfortunately, because of unexpected behaviour of some filesystems (no use
213 * of vfs_alloc()/vfs_init()) there's no good place to initialise the
214 * fsh_fshrecord_t structure. The approach being used here is to check if it's
215 * initialised in every call. Because of the fact that no lock could be used
216 * here (the same problem with initialisation), a spinlock is used. This is
217 * explained in more detail in a comment before fsh_fsrec_prepare(). After
218 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
219 * pointer locally, because it won't be changed until vfs_free() is called.
220 *
221 * Exceptions from this rule:
222 * - vfs_free() - it is expected that no other fsh calls would be made for the
223 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
224 * valid pointer and could not be concurrently accessed.
225 * - fshi_rele() - fsh_hook_install() comes before first fshi_rele() call;
226 * the fsh_fsrecord_t has been initialised there
227 *
228 *
229 * When there are no fsh functions (that use a particular fsh_fsrecord_t)
230 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
231 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
232 *
233 * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
234 * passing it to fsh_fsrec_destroy. We don't have to check if it is not equal
235 * to fsh_res_ptr, because all the fsh API calls involving this vfs_t should
236 * end before vfs_free() is called (outside the fsh, fsh_fsrecord is never
237 * equal to fsh_res_ptr). That is guaranteed by the explicit requirement that
238 * the caller of fsh API holds the vfs_t when needed. fsh_hook_remove() must not
239 * be called either, because the handles are invalidated after free callback has
240 * fired.
241 *
242 *
243 * Callbacks:
244 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
245 * before returning from domount()@vfs.c.
246 *
247 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
248 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
249 *
250 *
251 * 6. Locking
252 * a) public
253 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
254 * needed, the client does that.
255 *
256 * No locks are held across hooks or hook remove callbacks execution. It is
257 * safe to use fsh API inside hooks and hook remove callbacks.
258 *
259 * fsh_cb_lock is held across {mount,free} callbacks. Calling
260 * fsh_callback_{install,remove} inside of a callback will cause a deadlock.
261 *
262 * b) internals
263 * Locking diagram:
264 *
265 * fsh_hook_remove() fsh_hook_install() fsh_fsrec_destroy()
266 * | | |
267 * | | |
268 * +------------------+ | +------------+
269 * | | | |
270 * | V | |
271 * V +------------|---|-+
272 * fshi_rele() | fsh_lock | | |
273 * (sometimes) +------------|---|-+
274 * | | |
275 * | +---+-- fshfsr_lock, RW_WRITER -+
276 * | |
277 * V |
278 * +---------------------------------------+ |
279 * | fsh_map | |
280 * | | |
281 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|----------------+
282 * | +------------------------------^--------+
283 * | |
284 * | |
285 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER
286 * | |
287 * | |
288 * fsh_read(), fshi_rele()
289 * fsh_write(),
290 * ... Might be called from:
291 * fsh_hook_remove()
292 * fsh_read(), fsh_write(), ...
293 *
294 *
295 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
296 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
297 * it destroys the unremoved hooks). It is used only when fsh_map needs to be
298 * locked. The usage of this lock guarantees that the data in fsh_map and
299 * fshfsr_lists is consistent.
300 *
301 * In order to make calling callbacks inside callbacks possible, fsh_cb_owner is
302 * set by fsh_exec_{mount,free} callbacks to the thread that owns the
303 * fsh_cb_lock. It's always checked if we are owners of the mutex before
304 * entering it.
305 *
306 */
307
308
309 /* Internals */
310 typedef struct fsh_int {
311 fsh_handle_t fshi_handle;
312 fsh_t fshi_hooks;
313 vfs_t *fshi_vfsp;
314
315 kmutex_t fshi_lock;
316 uint64_t fshi_ref;
317 uint64_t fshi_doomed; /* changed inside fsh_lock */
318
319 /* next node in fshfsr_list */
320 list_node_t fshi_node;
321
322 /* next node in fsh_map */
323 list_node_t fshi_global;
324 } fsh_int_t;
325
326 typedef struct fsh_callback_int {
327 fsh_callback_t fshci_cb;
328 fsh_callback_handle_t fshci_handle;
329 list_node_t fshci_node;
330 } fsh_callback_int_t;
331
332
333 typedef struct fsh_exec {
334 fsh_int_t *fshe_fshi;
335 void *fshe_instance;
336 list_node_t fshe_node;
337 } fsh_exec_t;
338
339
340 static kmutex_t fsh_lock;
341
342 /*
343 * fsh_fsrecord_t is the main internal structure. It's content is protected
344 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
345 * the vfs_t that contains the fsh_fsrecord_t.
346 */
347 struct fsh_fsrecord {
348 krwlock_t fshfsr_lock;
349 int fshfsr_enabled;
350 list_t fshfsr_list;
351 };
352
353 /*
354 * Global list of fsh_int_t. Protected by fsh_lock.
355 */
356 static list_t fsh_map;
357
358 /*
359 * Global list of fsh_callback_int_t.
360 */
361 static kmutex_t fsh_cb_lock;
362 static kmutex_t fsh_cb_owner_lock;
363 static kthread_t *fsh_cb_owner;
364 static list_t fsh_cblist;
365
366 /*
367 * A reserved pointer for fsh purposes. It is used because of the method
368 * chosen for solving concurrency issues with vfs_fshrecord. The full
369 * explanation is in the big theory statement at the beginning of this
370 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
371 */
372 static void *fsh_res_ptr;
373
374 static fsh_fsrecord_t *fsh_fsrec_create();
375
376 int fsh_limit = INT_MAX;
377 static id_space_t *fsh_idspace;
378
379 /*
380 * fsh_fsrec_prepare()
381 *
382 * Important note:
383 * Before using this function, fsh_init() MUST be called. We do that in
384 * vfsinit()@vfs.c.
385 *
386 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
387 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
388 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
389 * filesystems could do the same thing. That's why this solution is
390 * introduced. It should be called before any code that needs access to
391 * vfs_fshrecord.
392 *
393 * Locking:
394 * There are no locks here, because there's no good place to initialise
395 * the lock. Concurrency issues are solved by using atomic instructions
396 * and a spinlock, which is spinning only once for a given vfs_t. Because
397 * of that, the usage of the spinlock isn't bad at all.
398 *
399 * How it works:
400 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
401 * fsh_res_ptr. That's a signal for other threads, that the structure
402 * is being initialised.
403 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
404 * because vfs_fshrecord is being initialised by another call.
405 * c) other cases:
406 * vfs_fshrecord is already initialised, so we can use it. It won't change
407 * until vfs_free() is called. It can't happen when someone is holding
408 * the vfs_t, which is expected from the caller of fsh API.
409 */
410 static void
411 fsh_fsrec_prepare(vfs_t *vfsp)
412 {
413 fsh_fsrecord_t *fsrec;
414
415 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
416 fsh_res_ptr)) == fsh_res_ptr)
417 ;
418
419 if (fsrec == NULL)
420 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
421 }
422
423 /*
424 * API for enabling/disabling fsh per vfs_t.
425 *
426 * A newly created vfs_t has fsh enabled by default. If one would want to change
427 * this behaviour, mount callbacks could be used.
428 *
429 * The caller is expected to hold the vfs_t.
430 *
431 * These functions must NOT be called in a hook.
432 */
433 void
434 fsh_fs_enable(vfs_t *vfsp)
435 {
436 fsh_fsrec_prepare(vfsp);
437
438 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
439 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
440 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
441 }
442
443 void
444 fsh_fs_disable(vfs_t *vfsp)
445 {
446 fsh_fsrec_prepare(vfsp);
447
448 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
449 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
450 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
451 }
452
453 /*
454 * API used for installing hooks. fsh_handle_t is returned for further
455 * actions (currently just removing) on this set of hooks.
456 *
457 * It's important that the hooks are executed in LIFO installation order (they
458 * are added to the head of the hook list).
459 *
460 * The caller is expected to hold the vfs_t.
461 *
462 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
463 */
464 fsh_handle_t
465 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
466 {
467 fsh_handle_t handle;
468 fsh_int_t *fshi;
469
470 fsh_fsrec_prepare(vfsp);
471
472 if ((handle = id_alloc(fsh_idspace)) == -1)
473 return (-1);
474
475 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
476 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
477 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
478 fshi->fshi_handle = handle;
479 fshi->fshi_doomed = 0;
480 fshi->fshi_ref = 1;
481 fshi->fshi_vfsp = vfsp;
482
483 mutex_enter(&fsh_lock);
484 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
485 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
486 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
487
488 list_insert_head(&fsh_map, fshi);
489 mutex_exit(&fsh_lock);
490
516 fshi_rele(fsh_int_t *fshi)
517 {
518 int destroy;
519
520 mutex_enter(&fshi->fshi_lock);
521 ASSERT(fshi->fshi_ref > 0);
522 fshi->fshi_ref--;
523 if (fshi->fshi_ref == 0) {
524 ASSERT(fshi->fshi_doomed == 1);
525 destroy = 1;
526 } else {
527 destroy = 0;
528 }
529 mutex_exit(&fshi->fshi_lock);
530
531 if (destroy) {
532 /*
533 * At this point, we are sure that fsh_hook_remove() has been
534 * called, that's why we don't remove the fshi from fsh_map.
535 * fsh_hook_remove() did that already.
536 * There is also no need to call fsh_fsrec_prepare() here.
537 */
538 fsh_fsrecord_t *fsrecp;
539
540 /*
541 * We don't have to call fsh_fsrec_prepare() here.
542 * fsh_fsrecord_t is already initialised, because we've found a
543 * mapping for the given handle.
544 */
545 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
546 ASSERT(fsrecp != NULL);
547 ASSERT(fsrecp != fsh_res_ptr);
548
549 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
550 list_remove(&fsrecp->fshfsr_list, fshi);
551 rw_exit(&fsrecp->fshfsr_lock);
552
553 if (fshi->fshi_hooks.remove_cb != NULL)
554 (*fshi->fshi_hooks.remove_cb)(
555 fshi->fshi_hooks.arg, fshi->fshi_handle);
556
557 id_free(fsh_idspace, fshi->fshi_handle);
558 mutex_destroy(&fshi->fshi_lock);
559 kmem_free(fshi, sizeof (*fshi));
560 }
561 }
562
563 /*
564 * Used for removing a hook set.
565 *
566 * fsh_hook_remove() invalidates the given handle.
567 *
568 * It is guaranteed, that after successful return from fsh_hook_remove(),
569 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
570 * go through this hook.
571 *
572 * There is no guarantee that after fsh_hook_remove() returns, the hook
573 * associated with the handle won't be executing. Instead, it is guaranteed that
574 * when remove_cb() is called, the hook finished it's execution in all threads.
575 * It is safe to destroy all internal data associated with this hook inside
576 * remove_cb().
604 mutex_exit(&fsh_lock);
605
606 fshi_rele(fshi);
607
608 return (0);
609 }
610
611 /*
612 * API for installing global mount/free callbacks.
613 *
614 * fsh_callback_t fields:
615 * fshc_arg - argument passed to the callbacks
616 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
617 * drops to 0
618 * fshc_mount - callback fired right before returning from domount()
619 * The first argument of these callbacks is the vfs_t that is mounted/freed.
620 * The second one is the fshc_arg.
621 *
622 * fsh_callback_handle_t is filled out by this function.
623 *
624 * Returns (-1) if hook/callback limit exceeded.
625 *
626 * Calling this function in a {mount,free} callback will cause a deadlock.
627 */
628 fsh_callback_handle_t
629 fsh_callback_install(fsh_callback_t *callback)
630 {
631 fsh_callback_int_t *fshci;
632 fsh_callback_handle_t handle;
633
634 if ((handle = id_alloc(fsh_idspace)) == -1)
635 return (-1);
636
637 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
638 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
639 fshci->fshci_handle = handle;
640
641 mutex_enter(&fsh_cb_lock);
642 list_insert_head(&fsh_cblist, fshci);
643 mutex_exit(&fsh_cb_lock);
644
645 return (handle);
646 }
647
648 /*
649 * API for removing global mount/free callbacks.
650 *
651 * Returns (-1) if callback wasn't found, 0 otherwise.
652 *
653 * Calling this function in a {mount,free} callback will cause a deadlock.
654 */
655 int
656 fsh_callback_remove(fsh_callback_handle_t handle)
657 {
658 fsh_callback_int_t *fshci;
659
660 mutex_enter(&fsh_cb_lock);
661
662 for (fshci = list_head(&fsh_cblist); fshci != NULL;
663 fshci = list_next(&fsh_cblist, fshci)) {
664 if (fshci->fshci_handle == handle) {
665 list_remove(&fsh_cblist, fshci);
666 break;
667 }
668 }
669
670 mutex_exit(&fsh_cb_lock);
671
672 if (fshci == NULL)
673 return (-1);
674
675 kmem_free(fshci, sizeof (*fshci));
676 id_free(fsh_idspace, handle);
677
678 return (0);
679 }
680
681 /*
682 * This function is executed right before returning from domount()@vfs.c.
683 * We are sure that it's called only after fsh_init().
684 * It executes all the mount callbacks installed in the fsh.
685 *
686 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
687 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
688 * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
689 * section in the big theory statement at the top of this file.
690 */
691 void
692 fsh_exec_mount_callbacks(vfs_t *vfsp)
693 {
694 fsh_callback_int_t *fshci;
695 fsh_callback_t *cb;
696 int fsh_context;
697
698 mutex_enter(&fsh_cb_owner_lock);
699 fsh_context = fsh_cb_owner == curthread;
700 mutex_exit(&fsh_cb_owner_lock);
701
702 if (!fsh_context) {
703 mutex_enter(&fsh_cb_lock);
704 mutex_enter(&fsh_cb_owner_lock);
705 fsh_cb_owner = curthread;
706 mutex_exit(&fsh_cb_owner_lock);
707 }
708
709 ASSERT(MUTEX_HELD(&fsh_cb_lock));
710
711 for (fshci = list_head(&fsh_cblist); fshci != NULL;
712 fshci = list_next(&fsh_cblist, fshci)) {
713 cb = &fshci->fshci_cb;
714 if (cb->fshc_mount != NULL)
715 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
716 }
717
718 if (!fsh_context) {
719 mutex_enter(&fsh_cb_owner_lock);
720 fsh_cb_owner = NULL;
721 mutex_exit(&fsh_cb_owner_lock);
722 mutex_exit(&fsh_cb_lock);
723 }
724 }
725
726 /*
727 * This function is executed right before VFS_FREEVFS() is called in
728 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
729 * It executes all the free callbacks installed in the fsh.
730 *
731 * free() callback is the point after the handles associated with the hooks
732 * installed on this vfs_t become invalid
733 */
734 void
735 fsh_exec_free_callbacks(vfs_t *vfsp)
736 {
737 fsh_callback_int_t *fshci;
738 fsh_callback_t *cb;
739 int fsh_context;
740
741 mutex_enter(&fsh_cb_owner_lock);
742 fsh_context = fsh_cb_owner == curthread;
743 mutex_exit(&fsh_cb_owner_lock);
744
745 if (!fsh_context) {
746 mutex_enter(&fsh_cb_lock);
747 mutex_enter(&fsh_cb_owner_lock);
748 fsh_cb_owner = curthread;
749 mutex_exit(&fsh_cb_owner_lock);
750 }
751
752 ASSERT(MUTEX_HELD(&fsh_cb_lock));
753
754 for (fshci = list_head(&fsh_cblist); fshci != NULL;
755 fshci = list_next(&fsh_cblist, fshci)) {
756 cb = &fshci->fshci_cb;
757 if (cb->fshc_free != NULL)
758 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
759 }
760
761 if (!fsh_context) {
762 mutex_enter(&fsh_cb_owner_lock);
763 fsh_cb_owner = NULL;
764 mutex_exit(&fsh_cb_owner_lock);
765 mutex_exit(&fsh_cb_lock);
766 }
767 }
768
769 /*
770 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
771 *
772 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
773 * does, it executes it. If not, underlying vnodeop/vfsop is called.
774 *
775 * These interfaces are using fsh_res_ptr (in fsh_fsrec_prepare()), so it's
776 * absolutely necessary to call fsh_init() before using them. That's done in
777 * vfsinit().
778 *
779 * While these functions are executing, it's expected that necessary vfs_t's
780 * are held so that vfs_free() isn't called. vfs_free() expects that noone
781 * accesses vfs_fshrecord of a given vfs_t.
782 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
783 * alive and valid.
784 * All these expectations are met because these functions are used only in
785 * correspondng {fop,fsop}_foo() functions.
786 */
787 int
788 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
789 caller_context_t *ct)
790 {
791 int ret;
792 fsh_fsrecord_t *fsrecp;
793 fsh_int_t *fshi;
794 fsh_exec_t *fshe;
795 list_t exec_list;
796
797 fsh_fsrec_prepare(vp->v_vfsp);
798 fsrecp = vp->v_vfsp->vfs_fshrecord;
799
800 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
801 if (!(fsrecp->fshfsr_enabled)) {
802 rw_exit(&fsrecp->fshfsr_lock);
803 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
804 }
805
806 list_create(&exec_list, sizeof (fsh_exec_t),
807 offsetof(fsh_exec_t, fshe_node));
808
809 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
810 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
811 if (fshi->fshi_hooks.pre_read != NULL ||
812 fshi->fshi_hooks.post_read != NULL) {
813 if (fshi_hold(fshi)) {
814 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
815 fshe->fshe_fshi = fshi;
816 list_insert_tail(&exec_list, fshe);
817 }
818 }
819 }
820 rw_exit(&fsrecp->fshfsr_lock);
821
822 /* Execute pre hooks */
823 for (fshe = list_head(&exec_list); fshe != NULL;
824 fshe = list_next(&exec_list, fshe)) {
825 if (fshe->fshe_fshi->fshi_hooks.pre_read != NULL)
826 (*fshe->fshe_fshi->fshi_hooks.pre_read)(
827 fshe->fshe_fshi->fshi_hooks.arg,
828 &fshe->fshe_instance,
829 &vp, &uiop, &ioflag, &cr, &ct);
830 }
831
832 ret = (*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
833
834 /* Execute post hooks */
835 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
836 if (fshe->fshe_fshi->fshi_hooks.post_read != NULL)
837 ret = (*fshe->fshe_fshi->fshi_hooks.post_read)(
838 ret, fshe->fshe_fshi->fshi_hooks.arg,
839 fshe->fshe_instance,
840 vp, uiop, ioflag, cr, ct);
841 fshi_rele(fshe->fshe_fshi);
842 kmem_free(fshe, sizeof (*fshe));
843 }
844 list_destroy(&exec_list);
845
846 return (ret);
847 }
848
849 int
850 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
851 caller_context_t *ct)
852 {
853 int ret;
854 fsh_fsrecord_t *fsrecp;
855 fsh_int_t *fshi;
856 fsh_exec_t *fshe;
857 list_t exec_list;
858
859 fsh_fsrec_prepare(vp->v_vfsp);
860 fsrecp = vp->v_vfsp->vfs_fshrecord;
861
862 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
863 if (!(fsrecp->fshfsr_enabled)) {
864 rw_exit(&fsrecp->fshfsr_lock);
865 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
866 }
867
868 list_create(&exec_list, sizeof (fsh_exec_t),
869 offsetof(fsh_exec_t, fshe_node));
870
871 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
872 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
873 if (fshi->fshi_hooks.pre_write != NULL ||
874 fshi->fshi_hooks.post_write != NULL) {
875 if (fshi_hold(fshi)) {
876 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
877 fshe->fshe_fshi = fshi;
878 list_insert_tail(&exec_list, fshe);
879 }
880 }
881 }
882 rw_exit(&fsrecp->fshfsr_lock);
883
884 /* Execute pre hooks */
885 for (fshe = list_head(&exec_list); fshe != NULL;
886 fshe = list_next(&exec_list, fshe)) {
887 if (fshe->fshe_fshi->fshi_hooks.pre_write != NULL)
888 (*fshe->fshe_fshi->fshi_hooks.pre_write)(
889 fshe->fshe_fshi->fshi_hooks.arg,
890 &fshe->fshe_instance,
891 &vp, &uiop, &ioflag, &cr, &ct);
892 }
893
894 ret = (*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
895
896 /* Execute post hooks */
897 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
898 if (fshe->fshe_fshi->fshi_hooks.post_write != NULL)
899 ret = (*fshe->fshe_fshi->fshi_hooks.post_write)(
900 ret, fshe->fshe_fshi->fshi_hooks.arg,
901 fshe->fshe_instance,
902 vp, uiop, ioflag, cr, ct);
903 fshi_rele(fshe->fshe_fshi);
904 kmem_free(fshe, sizeof (*fshe));
905 }
906 list_destroy(&exec_list);
907
908 return (ret);
909 }
910
911 int
912 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
913 {
914 int ret;
915 fsh_fsrecord_t *fsrecp;
916 fsh_int_t *fshi;
917 fsh_exec_t *fshe;
918 list_t exec_list;
919
920 fsh_fsrec_prepare(vfsp);
921 fsrecp = vfsp->vfs_fshrecord;
922
923 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
924 if (!(fsrecp->fshfsr_enabled)) {
925 rw_exit(&fsrecp->fshfsr_lock);
926 return ((*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr));
927 }
928
929 list_create(&exec_list, sizeof (fsh_exec_t),
930 offsetof(fsh_exec_t, fshe_node));
931
932 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
933 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
934 if (fshi->fshi_hooks.pre_mount != NULL ||
935 fshi->fshi_hooks.post_mount != NULL) {
936 if (fshi_hold(fshi)) {
937 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
938 fshe->fshe_fshi = fshi;
939 list_insert_tail(&exec_list, fshe);
940 }
941 }
942 }
943 rw_exit(&fsrecp->fshfsr_lock);
944
945 /* Execute pre hooks */
946 for (fshe = list_head(&exec_list); fshe != NULL;
947 fshe = list_next(&exec_list, fshe)) {
948 if (fshe->fshe_fshi->fshi_hooks.pre_mount != NULL)
949 (*fshe->fshe_fshi->fshi_hooks.pre_mount)(
950 &fshe->fshe_fshi->fshi_hooks.arg,
951 &fshe->fshe_instance,
952 &vfsp, &mvp, &uap, &cr);
953 }
954
955 ret = (*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
956
957 /* Execute post hooks */
958 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
959 if (fshe->fshe_fshi->fshi_hooks.post_mount != NULL)
960 ret = (*fshe->fshe_fshi->fshi_hooks.post_mount)(
961 ret, fshe->fshe_fshi->fshi_hooks.arg,
962 fshe->fshe_instance,
963 vfsp, mvp, uap, cr);
964 fshi_rele(fshe->fshe_fshi);
965 kmem_free(fshe, sizeof (*fshe));
966 }
967 list_destroy(&exec_list);
968
969 return (ret);
970 }
971
972 int
973 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
974 {
975 int ret;
976 fsh_fsrecord_t *fsrecp;
977 fsh_int_t *fshi;
978 fsh_exec_t *fshe;
979 list_t exec_list;
980
981 fsh_fsrec_prepare(vfsp);
982 fsrecp = vfsp->vfs_fshrecord;
983
984 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
985 if (!(fsrecp->fshfsr_enabled)) {
986 rw_exit(&fsrecp->fshfsr_lock);
987 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
988 }
989
990 list_create(&exec_list, sizeof (fsh_exec_t),
991 offsetof(fsh_exec_t, fshe_node));
992
993 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
994 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
995 if (fshi->fshi_hooks.pre_unmount != NULL ||
996 fshi->fshi_hooks.post_unmount != NULL) {
997 if (fshi_hold(fshi)) {
998 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
999 fshe->fshe_fshi = fshi;
1000 list_insert_tail(&exec_list, fshe);
1001 }
1002 }
1003 }
1004 rw_exit(&fsrecp->fshfsr_lock);
1005
1006 /* Execute pre hooks */
1007 for (fshe = list_head(&exec_list); fshe != NULL;
1008 fshe = list_next(&exec_list, fshe)) {
1009 if (fshe->fshe_fshi->fshi_hooks.pre_unmount != NULL)
1010 (*fshe->fshe_fshi->fshi_hooks.pre_unmount)(
1011 fshe->fshe_fshi->fshi_hooks.arg,
1012 &fshe->fshe_instance,
1013 &vfsp, &flag, &cr);
1014 }
1015
1016 ret = (*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr);
1017
1018 /* Execute post hooks */
1019 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
1020 if (fshe->fshe_fshi->fshi_hooks.post_unmount != NULL)
1021 ret = (*fshe->fshe_fshi->fshi_hooks.post_unmount)(
1022 ret, fshe->fshe_fshi->fshi_hooks.arg,
1023 fshe->fshe_instance,
1024 vfsp, flag, cr);
1025 fshi_rele(fshe->fshe_fshi);
1026 kmem_free(fshe, sizeof (*fshe));
1027 }
1028 list_destroy(&exec_list);
1029
1030 return (ret);
1031 }
1032
1033 /*
1034 * This is the funtion used by fsh_fsrec_prepare() to allocate a new
1035 * fsh_fsrecord. This function is called by the first function which
1036 * access the vfs_fshrecord and finds out it's NULL.
1037 */
1038 static fsh_fsrecord_t *
1039 fsh_fsrec_create()
1040 {
1041 fsh_fsrecord_t *fsrecp;
1042
1043 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
1044 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
1045 offsetof(fsh_int_t, fshi_node));
1046 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
1047 fsrecp->fshfsr_enabled = 1;
1048 return (fsrecp);
1049 }
1050
1051
1052 /*
1053 * This call must be used ONLY in vfs_free().
1054 *
1055 * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
1056 * passing it to fsh_fsrec_destroy.
1057 *
1058 * All the remaining hooks are being removed here.
1059 */
1060 void
1061 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
1062 {
1063 fsh_int_t *fshi;
1064
1065 VERIFY(fsrecp != NULL);
1066
1067 _NOTE(CONSTCOND)
1068 while (1) {
1069 mutex_enter(&fsh_lock);
1070 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
1071 fshi = list_remove_head(&fsrecp->fshfsr_list);
1072 rw_exit(&fsrecp->fshfsr_lock);
1073 if (fshi == NULL) {
1074 mutex_exit(&fsh_lock);
1075 break;
1076 }
1077 ASSERT(fshi->fshi_doomed == 0);
1078 list_remove(&fsh_map, fshi);
1079 mutex_exit(&fsh_lock);
1080
1081 if (fshi->fshi_hooks.remove_cb != NULL)
1082 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
1083 fshi->fshi_handle);
1084
1085 id_free(fsh_idspace, fshi->fshi_handle);
1086 mutex_destroy(&fshi->fshi_lock);
1087 kmem_free(fshi, sizeof (*fshi));
1088
1089 }
1090
1091 list_destroy(&fsrecp->fshfsr_list);
1092 rw_destroy(&fsrecp->fshfsr_lock);
1093 kmem_free(fsrecp, sizeof (*fsrecp));
1094 }
1095
1096 /*
1097 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
1098 * before every other fsh call.
1099 */
1100 void
1101 fsh_init(void)
1102 {
1103 mutex_init(&fsh_cb_lock, NULL, MUTEX_DRIVER, NULL);
1104 mutex_init(&fsh_cb_owner_lock, NULL, MUTEX_DRIVER, NULL);
1105 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
1106 offsetof(fsh_callback_int_t, fshci_node));
1107
1108 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
1109
1110 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
1111 fshi_global));
1112
1113 /* See comment above fsh_fsrec_prepare() */
1114 fsh_res_ptr = (void *)-1;
1115
1116 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
1117 }
|