1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Damian Bogel. All rights reserved.
14 */
15
16 #include <sys/debug.h>
17 #include <sys/errno.h>
18 #include <sys/fsh.h>
19 #include <sys/fsh_impl.h>
20 #include <sys/id_space.h>
21 #include <sys/kmem.h>
22 #include <sys/ksynch.h>
23 #include <sys/list.h>
24 #include <sys/sunddi.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/vfs.h>
28 #include <sys/vnode.h>
29
30 /*
31 * Filesystem hook framework (fsh)
32 *
33 * 1. Abstract.
34 * The main goal of the filesystem hook framework is to provide an easy way to
35 * inject client-defined behaviour into vfs/vnode calls. fsh works on
36 * vfs_t granularity.
37 *
38 *
39 * 2. Overview.
40 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
41 * - pointers to hooking functions (named after corresponding
42 * vnodeops/vfsops)
43 * - a pointer to an argument to pass (this is shared for all the
44 * hooks in a given fsh_t)
45 * - a pointer to the *hook remove callback* - it's being fired after a
46 * hook is removed and the hook has stopped executing. It's safe to destroy
47 * any data associated with this hook.
48 *
49 * The information from fsh_t is copied by the fsh and an fsh_handle_t
50 * is returned. It should be used for further removing.
51 *
52 *
53 * 3. Usage.
54 * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
55 * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
56 *
57 * fsh_t is a structure filled out by the client. If a client does not want
58 * to add/remove a hook for function foo(), he should fill the foo field of
59 * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
60 * two additional arguments:
61 * - fsh_int_t *fsh_int - this argument MUST be passed to
62 * hook_next_foo(). fsh wouldn't know which hook to execute next
63 * without it
64 * - void *arg - this is the argument passed with fsh_t during
65 * installation
66 * - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback
67 * (mentioned earlier); it's first argument is arg, the second is the
68 * handle
69 *
70 * After installation, an fsh_handle_t is returned to the caller.
71 *
72 * Every hook function is responsible for passing the control to the next
73 * hook associated with a particular call. In order to provide an easy way to
74 * modify the behaviour of a function call both before and after the
75 * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
76 * fsh_next_foo() at some point. This function does necessary internal
77 * operations and calls the next hook, until there's no hook left, then it
78 * calls the underlying vfsop/vnodeop.
79 * Example:
80 * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
81 * cmn_err(CE_NOTE, "freefs called!\n");
82 * return (fsh_next_freefs(fsh_int, vfsp));
83 * }
84 *
85 *
86 * A client might want to fire callbacks when vfs_t's are being mounted
87 * or freed. There's an fsh_callback_t structure provided to install such
88 * callbacks along with the API.
89 * It is legal to call fsh_hook_{install,remove}() inside a mount callback
90 * WITHOUT holding the vfs_t.
91 *
92 * After vfs_t's free callback returns, all the handles associated with the
93 * hooks installed on this vfs_t are invalid and must not be used.
94 *
95 *
96 * 4. API
97 * None of the APIs should be called during interrupt context above lock
98 * level. The only exceptions are fsh_next_foo() functions, which do not use
99 * locks.
100 *
101 * a) fsh.h
102 * Any of these functions could be called inside a hook or a hook remove
103 * callback.
104 * fsh_callback_{install,remove}() must not be called inside a {mount,free}
105 * callback. Doing so will cause a deadlock. Other functions can be called
106 * inside {mount,free} callbacks.
107 *
108 * fsh_fs_enable(vfs_t *vfsp)
109 * fsh_fs_disable(vfs_t *vfsp)
110 * Enables/disables fsh for a given vfs_t.
111 *
112 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
113 * Installs hooks on vfsp filesystem.
114 * It's important that hooks are executed in LIFO installation order,
115 * which means that if there are hooks A and B installed in this order, B
116 * is going to be executed before A.
117 * It returns a correct handle, or (-1) if hook/callback limit exceeded.
118 * The handle is valid until a free callback returns or an explicit call
119 * to fsh_hook_remove().
120 *
121 * fsh_hook_remove(fsh_handle_t handle)
122 * Removes a hook and invalidates the handle.
123 * It is guaranteed that after this funcion returns, calls to
124 * vnodeops/vfsops won't go through this hook, although there might be
125 * some threads still executing this hook. When hook remove callback is
126 * fired, it is guaranteed that the hook won't be executed anymore. It is
127 * safe to remove all the internal data associated with this hook inside
128 * the hook remove callback.
129 *
130 * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
131 * This is the function which should be called once in every hook. It
132 * does the necessary internal operations and passes control to the
133 * next hook or, if there's no hook left, to the underlying
134 * vfsop/vnodeop.
135 *
136 * fsh_callback_install(fsh_callback_t *callback)
137 * fsh_callback_remove(fsh_callback_handle_t handle)
138 * Installs/removes callbacks for vfs_t mount/free. The mount callback
139 * is executed right before domount() returns. The free callback is
140 * called right before VFS_FREEVFS() is called.
141 * The fsh_callback_install() returns a correct handle, or (-1) if
142 * hook/callback limit exceeded.
143 *
144 * b) fsh_impl.h (for vfs.c and vnode.c only)
145 * fsh_init()
146 * This call has to be done in vfsinit(). It initialises the fsh. It
147 * is absolutely necessary that this call is made before any other fsh
148 * operation.
149 *
150 * fsh_exec_mount_callbacks(vfs_t *vfsp)
151 * fsh_exec_free_callbacks(vfs_t *vfsp)
152 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
153 *
154 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
155 * Destroys an fsh_fsrecord structure. All the hooks installed on this
156 * vfs_t are then destroyed. free callback is called before this function.
157 *
158 * fsh_foo(ARGUMENTS)
159 * Function used to start executing the hook chain for a given call.
160 *
161 *
162 * 5. Internals.
163 * fsh_int_t is an internal hook structure. It is reference counted.
164 * fshi_hold() and fshi_rele() should be used whenever needed.
165 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
166 * (local to vfs_t). All entries are unique and are identified by fshi_handle.
167 *
168 * fsh_int_t properties:
169 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both
170 * fsh_map and fshfsr_list
171 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
172 * from fsh_map and marks the hook as *doomed*
173 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
174 * executing it
175 * - if fsh_int_t is marked as *doomed*, the reference counter is not
176 * be increased and thus no thread can acquire this fsh_int_t
177 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
178 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
179 * - fsh_int_t could be also destroyed without fsh_hook_remove() call,
180 * that happens only inside fsh_fsrec_destroy() where it is guaranteed
181 * that there is no thread executing the hook
182 *
183 *
184 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
185 * fsh_fsrecord_t contains:
186 * - an rw-lock that protects the structure
187 * - a list of hooks installed on this vfs_t
188 * - a flag which tells whether fsh is enabled on this vfs_t
189 *
190 *
191 * fsh_prepare_fsrec rule:
192 * Every function that needs vfsp->vfs_fshrecord has to call
193 * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to
194 * use vfsp->vfs_fshrecord.
195 *
196 * Unfortunately, because of unexpected behaviour of some filesystems (no use
197 * of vfs_alloc()/vfs_init()) there's no good place to initialise the
198 * fsh_fshrecord_t structure. The approach being used here is to check if it's
199 * initialised in every call. Because of the fact that no lock could be used
200 * here (the same problem with initialisation), a spinlock is used. This is
201 * explained in more detail in a comment before fsh_prepare_fsrec(). After
202 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
203 * pointer locally, because it won't be changed until vfs_free() is called.
204 *
205 * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
206 * where there is expected that no other fsh calls would be made for the
207 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
208 * valid pointer and could not be concurrently accessed.
209 *
210 * When there are no fsh functions (that use a particular fsh_fsrecord_t)
211 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
212 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
213 *
214 *
215 * Callbacks:
216 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
217 * before returning from domount()@vfs.c.
218 *
219 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
220 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
221 *
222 *
223 * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
224 * This function is quite simple. It takes the fsh_int_t and passes control
225 * to the next hook or to the underlying vnodeop/vfsop.
226 *
227 *
228 * 6. Locking
229 * a) public
230 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
231 * needed, the client does that.
232 *
233 * fsh_callback_{install,remove} must not be called inside a callback, because
234 * it will cause a deadlock.
235 *
236 * b) internal
237 * Locking diagram:
238 *
239 * fsh_hook_install() fsh_hook_remove() fsh_fsrec_destroy()
240 * | | |
241 * | | |
242 * +------------------+ | +------------+
243 * | | |
244 * V V V
245 * fsh_lock
246 * | |
247 * | +----- fshfsr_lock, RW_WRITER ---+
248 * | |
249 * V |
250 * +---------------------------------------+ |
251 * | fsh_map | |
252 * | | |
253 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+
254 * | +------------------------------^--------+
255 * | |
256 * | |
257 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER
258 * | |
259 * | |
260 * fsh_read(), fshi_rele()
261 * fsh_write(),
262 * ..., Might be called from:
263 * fsh_next_read(), fsh_hook_remove()
264 * fsh_next_write(), fsh_read(), fsh_write(), ...
265 * ... fsh_next_read(), fsh_next_write(), ...
266 *
267 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
268 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
269 * it destroys the unremoved hooks). It is used only when fsh_map needs to be
270 * locked. The usage of this lock guarantees that the data in fsh_map and
271 * fshfsr_lists is consistent.
272 */
273
274
275 /* Internals */
276 struct fsh_int {
277 fsh_handle_t fshi_handle;
278 fsh_t fshi_hooks;
279 vfs_t *fshi_vfsp;
280
281 kmutex_t fshi_lock;
282 uint64_t fshi_ref;
283 uint64_t fshi_doomed; /* changed inside fsh_lock */
284
285 /* next node in fshfsr_list */
286 list_node_t fshi_next;
287
288 /* next node in fsh_map */
289 list_node_t fshi_global;
290 };
291
292 typedef struct fsh_callback_int {
293 fsh_callback_t fshci_cb;
294 fsh_callback_handle_t fshci_handle;
295 list_node_t fshci_next;
296 } fsh_callback_int_t;
297
298
299 static kmutex_t fsh_lock;
300
301 /*
302 * fsh_fsrecord_t is the main internal structure. It's content is protected
303 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
304 * the vfs_t that contains the fsh_fsrecord_t.
305 */
306 struct fsh_fsrecord {
307 krwlock_t fshfsr_lock;
308 int fshfsr_enabled;
309 list_t fshfsr_list;
310 };
311
312 /*
313 * Global list of fsh_int_t. Protected by fsh_lock.
314 */
315 static list_t fsh_map;
316
317 /*
318 * Global list of fsh_callback_int_t.
319 */
320 static krwlock_t fsh_cblist_lock;
321 static list_t fsh_cblist;
322
323 /*
324 * A reserved pointer for fsh purposes. It is used because of the method
325 * chosen for solving concurrency issues with vfs_fshrecord. The full
326 * explanation is in the big theory statement at the beginning of this
327 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
328 */
329 static void *fsh_res_ptr;
330
331 static fsh_fsrecord_t *fsh_fsrec_create();
332
333 int fsh_limit = INT_MAX;
334 static id_space_t *fsh_idspace;
335
336 /*
337 * fsh_prepare_fsrec()
338 *
339 * Important note:
340 * Before using this function, fsh_init() MUST be called. We do that in
341 * vfsinit()@vfs.c.
342 *
343 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
344 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
345 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
346 * filesystems could do the same thing. That's why this solution is
347 * introduced. It should be called before any code that needs access to
348 * vfs_fshrecord.
349 *
350 * Locking:
351 * There are no locks here, because there's no good place to initialise
352 * the lock. Concurrency issues are solved by using atomic instructions
353 * and a spinlock, which is spinning only once for a given vfs_t. Because
354 * of that, the usage of the spinlock isn't bad at all.
355 *
356 * How it works:
357 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
358 * fsh_res_ptr. That's a signal for other threads, that the structure
359 * is being initialised.
360 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
361 * because vfs_fshrecord is being initialised by another call.
362 * c) other cases:
363 * vfs_fshrecord is already initialised, so we can use it. It won't change
364 * until vfs_free() is called. It can't happen when someone is holding
365 * the vfs_t, which is expected from the caller of fsh API.
366 */
367 static void
368 fsh_prepare_fsrec(vfs_t *vfsp)
369 {
370 fsh_fsrecord_t *fsrec;
371
372 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
373 fsh_res_ptr)) == fsh_res_ptr)
374 ;
375
376 if (fsrec == NULL)
377 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
378 }
379
380 /*
381 * API for enabling/disabling fsh per vfs_t.
382 *
383 * A newly created vfs_t has fsh enabled by default. If one would want to change
384 * this behaviour, mount callbacks could be used.
385 *
386 * The caller is expected to hold the vfs_t.
387 *
388 * These functions must NOT be called in a hook.
389 */
390 void
391 fsh_fs_enable(vfs_t *vfsp)
392 {
393 fsh_prepare_fsrec(vfsp);
394
395 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
396 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
397 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
398 }
399
400 void
401 fsh_fs_disable(vfs_t *vfsp)
402 {
403 fsh_prepare_fsrec(vfsp);
404
405 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
406 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
407 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
408 }
409
410 /*
411 * API used for installing hooks. fsh_handle_t is returned for further
412 * actions (currently just removing) on this set of hooks.
413 *
414 * fsh_t fields:
415 * - arg - argument passed to every hook
416 * - remove_cb - remove callback, called after a hook is removed and all the
417 * threads stops executing it
418 * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
419 * if there is no hook desired for an operation, it should be set to
420 * NULL
421 *
422 * It's important that the hooks are executed in LIFO installation order (they
423 * are added to the head of the hook list).
424 *
425 * The caller is expected to hold the vfs_t.
426 *
427 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
428 */
429 fsh_handle_t
430 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
431 {
432 fsh_handle_t handle;
433 fsh_int_t *fshi;
434
435 fsh_prepare_fsrec(vfsp);
436
437 if ((handle = id_alloc(fsh_idspace)) == -1)
438 return (-1);
439
440 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
441 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
442 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
443 fshi->fshi_handle = handle;
444 fshi->fshi_doomed = 0;
445 fshi->fshi_ref = 1;
446 fshi->fshi_vfsp = vfsp;
447
448 mutex_enter(&fsh_lock);
449 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
450 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
451 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
452
453 list_insert_head(&fsh_map, fshi);
454 mutex_exit(&fsh_lock);
455
456 return (handle);
457 }
458
459 static int
460 fshi_hold(fsh_int_t *fshi)
461 {
462 int can_hold;
463
464 mutex_enter(&fshi->fshi_lock);
465 if (fshi->fshi_doomed == 1) {
466 can_hold = 0;
467 } else {
468 fshi->fshi_ref++;
469 can_hold = 1;
470 }
471 mutex_exit(&fshi->fshi_lock);
472
473 return (can_hold);
474 }
475
476 /*
477 * This function must not be called while fshfsr_lock is held. Doing so could
478 * cause a deadlock.
479 */
480 static void
481 fshi_rele(fsh_int_t *fshi)
482 {
483 int destroy;
484
485 mutex_enter(&fshi->fshi_lock);
486 ASSERT(fshi->fshi_ref > 0);
487 fshi->fshi_ref--;
488 if (fshi->fshi_ref == 0) {
489 ASSERT(fshi->fshi_doomed == 1);
490 destroy = 1;
491 } else {
492 destroy = 0;
493 }
494 mutex_exit(&fshi->fshi_lock);
495
496 if (destroy) {
497 /*
498 * At this point, we are sure that fsh_hook_remove() has been
499 * called, that's why we don't remove the fshi from fsh_map.
500 * fsh_hook_remove() did that already.
501 */
502 fsh_fsrecord_t *fsrecp;
503
504 if (fshi->fshi_hooks.remove_cb != NULL)
505 (*fshi->fshi_hooks.remove_cb)(
506 fshi->fshi_hooks.arg, fshi->fshi_handle);
507 /*
508 * We don't have to call fsh_prepare_fsrec() here.
509 * fsh_fsrecord_t is already initialised, because we've found a
510 * mapping for the given handle.
511 */
512 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
513 ASSERT(fsrecp != NULL);
514 ASSERT(fsrecp != fsh_res_ptr);
515
516 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
517 list_remove(&fsrecp->fshfsr_list, fshi);
518 rw_exit(&fsrecp->fshfsr_lock);
519
520 id_free(fsh_idspace, fshi->fshi_handle);
521 mutex_destroy(&fshi->fshi_lock);
522 kmem_free(fshi, sizeof (*fshi));
523 }
524 }
525
526 /*
527 * Used for removing a hook set.
528 *
529 * fsh_hook_remove() invalidates the given handle.
530 *
531 * It is guaranteed, that after successful return from fsh_hook_remove(),
532 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
533 * go through this hook.
534 *
535 * There is no guarantee that after fsh_hook_remove() returns, the hook
536 * associated with the handle won't be executing. Instead, it is guaranteed that
537 * when remove_cb() is called, the hook finished it's execution in all threads.
538 * It is safe to destroy all internal data associated with this hook inside
539 * remove_cb().
540 *
541 * It is possible that remove_cb() would be called before fsh_hook_remove()
542 * returns.
543 *
544 * Returns (-1) if hook wasn't found, 0 otherwise.
545 */
546 int
547 fsh_hook_remove(fsh_handle_t handle)
548 {
549 fsh_int_t *fshi;
550
551 mutex_enter(&fsh_lock);
552 for (fshi = list_head(&fsh_map); fshi != NULL;
553 fshi = list_next(&fsh_map, fshi)) {
554 if (fshi->fshi_handle == handle) {
555 list_remove(&fsh_map, fshi);
556 break;
557 }
558 }
559
560 if (fshi == NULL)
561 return (-1);
562
563 mutex_enter(&fshi->fshi_lock);
564 ASSERT(fshi->fshi_doomed == 0);
565 fshi->fshi_doomed = 1;
566 mutex_exit(&fshi->fshi_lock);
567 mutex_exit(&fsh_lock);
568
569 fshi_rele(fshi);
570
571 return (0);
572 }
573
574 /*
575 * API for installing global mount/free callbacks.
576 *
577 * fsh_callback_t fields:
578 * fshc_arg - argument passed to the callbacks
579 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
580 * drops to 0
581 * fshc_mount - callback fired right before returning from domount()
582 * The first argument of these callbacks is the vfs_t that is mounted/freed.
583 * The second one is the fshc_arg.
584 *
585 * fsh_callback_handle_t is filled out by this function.
586 *
587 * This function must NOT be called in a callback, because it will cause
588 * a deadlock.
589 *
590 * Returns (-1) if hook/callback limit exceeded.
591 */
592 fsh_callback_handle_t
593 fsh_callback_install(fsh_callback_t *callback)
594 {
595 fsh_callback_int_t *fshci;
596 fsh_callback_handle_t handle;
597
598 if ((handle = id_alloc(fsh_idspace)) == -1)
599 return (-1);
600
601 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
602 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
603 fshci->fshci_handle = handle;
604
605 /* If it is called in a {mount,free} callback, causes deadlock. */
606 rw_enter(&fsh_cblist_lock, RW_WRITER);
607 list_insert_head(&fsh_cblist, fshci);
608 rw_exit(&fsh_cblist_lock);
609
610 return (handle);
611 }
612
613 /*
614 * API for removing global mount/free callbacks.
615 *
616 * This function must NOT be called in a callback, because it will cause
617 * a deadlock.
618 *
619 * Returns (-1) if callback wasn't found, 0 otherwise.
620 */
621 int
622 fsh_callback_remove(fsh_callback_handle_t handle)
623 {
624 fsh_callback_int_t *fshci;
625
626 /* If it is called in a {mount,free} callback, causes deadlock. */
627 rw_enter(&fsh_cblist_lock, RW_WRITER);
628 for (fshci = list_head(&fsh_cblist); fshci != NULL;
629 fshci = list_next(&fsh_cblist, fshci)) {
630 if (fshci->fshci_handle == handle) {
631 list_remove(&fsh_cblist, fshci);
632 break;
633 }
634 }
635 rw_exit(&fsh_cblist_lock);
636
637 if (fshci == NULL)
638 return (-1);
639
640 kmem_free(fshci, sizeof (*fshci));
641 id_free(fsh_idspace, handle);
642
643 return (0);
644 }
645
646 /*
647 * This function is executed right before returning from domount()@vfs.c.
648 * We are sure that it's called only after fsh_init().
649 * It executes all the mount callbacks installed in the fsh.
650 *
651 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
652 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
653 * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
654 * section in the big theory statement at the top of this file.
655 */
656 void
657 fsh_exec_mount_callbacks(vfs_t *vfsp)
658 {
659 fsh_callback_int_t *fshci;
660 fsh_callback_t *cb;
661
662 rw_enter(&fsh_cblist_lock, RW_READER);
663 for (fshci = list_head(&fsh_cblist); fshci != NULL;
664 fshci = list_next(&fsh_cblist, fshci)) {
665 cb = &fshci->fshci_cb;
666 if (cb->fshc_mount != NULL)
667 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
668 }
669 rw_exit(&fsh_cblist_lock);
670 }
671
672 /*
673 * This function is executed right before VFS_FREEVFS() is called in
674 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
675 * It executes all the free callbacks installed in the fsh.
676 *
677 * free() callback is the point after the handles associated with the hooks
678 * installed on this vfs_t become invalid
679 */
680 void
681 fsh_exec_free_callbacks(vfs_t *vfsp)
682 {
683 fsh_callback_int_t *fshci;
684 fsh_callback_t *cb;
685
686 rw_enter(&fsh_cblist_lock, RW_READER);
687 for (fshci = list_head(&fsh_cblist); fshci != NULL;
688 fshci = list_next(&fsh_cblist, fshci)) {
689 cb = &fshci->fshci_cb;
690 if (cb->fshc_free != NULL)
691 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
692 }
693 rw_exit(&fsh_cblist_lock);
694 }
695
696 /*
697 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
698 *
699 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
700 * does, it executes it. If not, underlying vnodeop/vfsop is called.
701 *
702 * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
703 * absolutely necessary to call fsh_init() before using them. That's done in
704 * vfsinit().
705 *
706 * While these functions are executing, it's expected that necessary vfs_t's
707 * are held so that vfs_free() isn't called. vfs_free() expects that noone
708 * accesses vfs_fshrecord of a given vfs_t.
709 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
710 * alive and valid.
711 * All these expectations are met because these functions are used only in
712 * correspondng {fop,fsop}_foo() functions.
713 */
714 int
715 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
716 caller_context_t *ct)
717 {
718 int ret;
719 fsh_fsrecord_t *fsrecp;
720 fsh_int_t *fshi;
721
722 fsh_prepare_fsrec(vp->v_vfsp);
723 fsrecp = vp->v_vfsp->vfs_fshrecord;
724
725 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
726 if (!(fsrecp->fshfsr_enabled)) {
727 rw_exit(&fsrecp->fshfsr_lock);
728 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
729 }
730
731 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
732 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
733 if (fshi->fshi_hooks.read != NULL)
734 if (fshi_hold(fshi))
735 break;
736 }
737 rw_exit(&fsrecp->fshfsr_lock);
738
739 if (fshi == NULL)
740 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
741
742 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
743 vp, uiop, ioflag, cr, ct);
744 fshi_rele(fshi);
745 return (ret);
746 }
747
748 int
749 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
750 caller_context_t *ct)
751 {
752 fsh_int_t *fshi;
753 int ret;
754 fsh_fsrecord_t *fsrecp;
755
756 fsh_prepare_fsrec(vp->v_vfsp);
757 fsrecp = vp->v_vfsp->vfs_fshrecord;
758
759 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
760 if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
761 rw_exit(&fsrecp->fshfsr_lock);
762 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
763 }
764
765 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
766 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
767 if (fshi->fshi_hooks.write != NULL)
768 if (fshi_hold(fshi))
769 break;
770 }
771 rw_exit(&fsrecp->fshfsr_lock);
772
773 if (fshi == NULL)
774 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
775
776 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
777 vp, uiop, ioflag, cr, ct);
778 fshi_rele(fshi);
779 return (ret);
780 }
781
782 int
783 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
784 {
785 fsh_fsrecord_t *fsrecp;
786 fsh_int_t *fshi;
787 int ret;
788
789 fsh_prepare_fsrec(vfsp);
790 fsrecp = vfsp->vfs_fshrecord;
791
792 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
793 if (!(fsrecp->fshfsr_enabled)) {
794 rw_exit(&fsrecp->fshfsr_lock);
795 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
796 }
797
798 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
799 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
800 if (fshi->fshi_hooks.mount != NULL)
801 if (fshi_hold(fshi))
802 break;
803 }
804 rw_exit(&fsrecp->fshfsr_lock);
805
806 if (fshi == NULL)
807 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
808
809 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
810 vfsp, mvp, uap, cr);
811 fshi_rele(fshi);
812 return (ret);
813 }
814
815 int
816 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
817 {
818 fsh_fsrecord_t *fsrecp;
819 fsh_int_t *fshi;
820 int ret;
821
822 fsh_prepare_fsrec(vfsp);
823 fsrecp = vfsp->vfs_fshrecord;
824
825 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
826 if (!(fsrecp->fshfsr_enabled)) {
827 rw_exit(&fsrecp->fshfsr_lock);
828 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
829 }
830
831 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
832 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
833 if (fshi->fshi_hooks.unmount != NULL)
834 if (fshi_hold(fshi))
835 break;
836 }
837 rw_exit(&fsrecp->fshfsr_lock);
838
839 if (fshi == NULL)
840 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
841
842 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
843 vfsp, flag, cr);
844 fshi_rele(fshi);
845 return (ret);
846 }
847
848 /*
849 * This is the funtion used by fsh_prepare_fsrec() to allocate a new
850 * fsh_fsrecord. This function is called by the first function which
851 * access the vfs_fshrecord and finds out it's NULL.
852 */
853 static fsh_fsrecord_t *
854 fsh_fsrec_create()
855 {
856 fsh_fsrecord_t *fsrecp;
857
858 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
859 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
860 offsetof(fsh_int_t, fshi_next));
861 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
862 fsrecp->fshfsr_enabled = 1;
863 return (fsrecp);
864 }
865
866
867 /*
868 * This call can be used ONLY in vfs_free(). It's assumed that no other
869 * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
870 * are executing while a call to fsh_fsrec_destroy() is made. With this
871 * assumptions, no concurrency issues occur.
872 *
873 * Before calling this function outside the fsh, it's sufficient and
874 * required to check if the passed fsh_fsrecord * is not NULL. We don't
875 * have to check if it is not equal to fsh_res_ptr, because all the fsh API
876 * calls involving this vfs_t should end before vfs_free() is called
877 * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
878 * guaranteed by the explicit requirement that the caller of fsh API holds
879 * the vfs_t when needed.
880 *
881 * All the remaining hooks are being removed.
882 */
883 void
884 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
885 {
886 fsh_int_t *fshi;
887
888 VERIFY(fsrecp != NULL);
889
890 _NOTE(CONSTCOND)
891 while (1) {
892 mutex_enter(&fsh_lock);
893 fshi = list_remove_head(&fsrecp->fshfsr_list);
894 if (fshi == NULL) {
895 mutex_exit(&fsh_lock);
896 break;
897 }
898 ASSERT(fshi->fshi_doomed == 0);
899 list_remove(&fsh_map, fshi);
900 mutex_exit(&fsh_lock);
901
902 if (fshi->fshi_hooks.remove_cb != NULL)
903 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
904 fshi->fshi_handle);
905 id_free(fsh_idspace, fshi->fshi_handle);
906 mutex_destroy(&fshi->fshi_lock);
907 kmem_free(fshi, sizeof (*fshi));
908
909 }
910
911 list_destroy(&fsrecp->fshfsr_list);
912 rw_destroy(&fsrecp->fshfsr_lock);
913 kmem_free(fsrecp, sizeof (*fsrecp));
914 }
915
916 /*
917 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
918 * before every other fsh call.
919 */
920 void
921 fsh_init(void)
922 {
923 rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
924 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
925 offsetof(fsh_callback_int_t, fshci_next));
926
927 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
928
929 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
930 fshi_global));
931
932 /* See comment above fsh_prepare_fsrec() */
933 fsh_res_ptr = (void *)-1;
934
935 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
936 }
937
938 /*
939 * These functions are used to pass control to the next hook or underlying
940 * vop or vfsop. It's client doesn't have to worry about any locking.
941 */
942 int
943 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
944 cred_t *cr, caller_context_t *ct)
945 {
946 int ret;
947 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
948
949 /*
950 * The passed fshi is the previous hook (the one from which we've been
951 * called). We need to find the next one.
952 */
953 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
954 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
955 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
956 if (fshi->fshi_hooks.read != NULL)
957 if (fshi_hold(fshi))
958 break;
959 }
960 rw_exit(&fsrecp->fshfsr_lock);
961
962 if (fshi == NULL)
963 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
964
965 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
966 vp, uiop, ioflag, cr, ct);
967 fshi_rele(fshi);
968 return (ret);
969 }
970
971 int
972 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
973 cred_t *cr, caller_context_t *ct)
974 {
975 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
976 int ret;
977
978 /*
979 * The passed fshi is the previous hook (the one from which we've been
980 * called). We need to find the next one.
981 */
982 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
983 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
984 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
985 if (fshi->fshi_hooks.write != NULL)
986 if (fshi_hold(fshi))
987 break;
988 }
989 rw_exit(&fsrecp->fshfsr_lock);
990
991 if (fshi == NULL)
992 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
993
994 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
995 vp, uiop, ioflag, cr, ct);
996 fshi_rele(fshi);
997 return (ret);
998 }
999
1000 int
1001 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
1002 cred_t *cr)
1003 {
1004 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1005 int ret;
1006
1007 /*
1008 * The passed fshi is the previous hook (the one from which we've been
1009 * called). We need to find the next one.
1010 */
1011 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1012 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1013 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1014 if (fshi->fshi_hooks.mount != NULL)
1015 if (fshi_hold(fshi))
1016 break;
1017 }
1018 rw_exit(&fsrecp->fshfsr_lock);
1019
1020 if (fshi == NULL)
1021 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
1022
1023 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
1024 vfsp, mvp, uap, cr);
1025 fshi_rele(fshi);
1026 return (ret);
1027 }
1028
1029 int
1030 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
1031 {
1032 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1033 int ret;
1034
1035 /*
1036 * The passed fshi is the previous hook (the one from which we've been
1037 * called). We need to find the next one.
1038 */
1039 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1040 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1041 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1042 if (fshi->fshi_hooks.unmount != NULL)
1043 if (fshi_hold(fshi))
1044 break;
1045 }
1046 rw_exit(&fsrecp->fshfsr_lock);
1047
1048 if (fshi == NULL)
1049 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
1050
1051 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
1052 vfsp, flag, cr);
1053 fshi_rele(fshi);
1054 return (ret);
1055 }