1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Damian Bogel. All rights reserved.
14 */
15
16 #include <sys/debug.h>
17 #include <sys/errno.h>
18 #include <sys/fsh.h>
19 #include <sys/fsh_impl.h>
20 #include <sys/id_space.h>
21 #include <sys/kmem.h>
22 #include <sys/ksynch.h>
23 #include <sys/list.h>
24 #include <sys/sunddi.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/vfs.h>
28 #include <sys/vnode.h>
29
30 /*
31 * Filesystem hook framework (fsh)
32 *
33 * 1. Abstract.
34 * The main goal of the filesystem hook framework is to provide an easy way to
35 * inject client-defined behaviour into vfs/vnode calls. fsh works on
36 * vfs_t granularity.
37 *
38 * Note: In this document, both an fsh_t structure and hooking function for a
39 * vnodeop/vfsop is referred to as *hook*.
40 *
41 *
42 * 2. Overview.
43 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
44 * - pointers to hooking functions
45 * - an argument to pass (this is shared for all the hooks in a given
46 * fsh_t)
47 * - a pointer to the *hook remove callback*
48 *
49 * The information from fsh_t is copied by the fsh and an fsh_handle_t
50 * is returned. It should be used for further removing.
51 *
52 *
53 * 3. Usage.
54 * It is expected that vfs_t/vnode_t passed to fsh_foo() functions are held by
55 * the caller when needed. fsh does no vfs_t/vnode_t locking.
56 *
57 * fsh_t is a structure filled out by the client. It contains:
58 * - pointers to hooking functions
59 * - the argument passed to the hooks
60 * - the *hook remove callback*
61 *
62 * If a client does not want to add a hook for function foo(), he should fill
63 * corresponding fields with NULLs. For every vfsop/vnodeop there are two
64 * fields: pre_foo() and post_foo(). These are the functions called before and
65 * after the next hook or underlying vfsop/vnodeop.
66 *
67 * Pre hooks take:
68 * - arg
69 * - pointer to a field containing void* - it should be filled whenever
70 * the client wants to have some data shared by the pre and post hooks in
71 * the same syscall execution. This is called the *instance data*.
72 * - pointers to the arguments passed to the underlying vfsop/vnodeop
73 * Pre hooks return void.
74 *
75 * Post hooks take:
76 * - value returned by the previous post hook or underlying vfsop/vnodeop
77 * - arg
78 * - pointer to the *instance data*
79 * - arguments passed to the underlying vfsop/vnodeop
80 * Post hooks return an int, which should be treated as the vfsop/vnodeop
81 * return value.
82 * Memory allocated by pre hook must be deallocated by the post hook.
83 *
84 * Execution path of hooks A, B, C is as follows:
85 * foo()
86 * preA(argA, &instancepA, ...);
87 * preB(argB, &instancepB, ...);
88 * preC(argC, &instancepC, ...);
89 * ret = VOP_FOO();
90 * ret = postC(ret, argC, instancepC, ...);
91 * ret = postB(ret, argB, instancepB, ...);
92 * ret = postC(ret, argA, instancepA, ...);
93 * return (ret);
94 *
95 * After installation, an fsh_handle_t is returned to the caller.
96 *
97 * Hook remove callback - it's a function being fired after a hook is removed
98 * and no thread is going to execute it anymore. It's safe to destroy all the
99 * data associated with this hook inside it.
100 *
101 * It is guaranteed, that whenever a pre_hook() is called, there will be also
102 * post_hook() called within the same syscall.
103 *
104 * If a hook (HNew) is installed/removed on/from a vfs_t within execution of
105 * another hook (HExec) installed on this vfs_t, the syscall that executes
106 * HExec won't fire HNew.
107 *
108 * A client might want to fire callbacks when vfs_ts are being mounted
109 * or freed. There's an fsh_callback_t structure provided to install such
110 * callbacks along with the API.
111 * It is legal to call fsh_hook_{install,remove}() inside a mount callback
112 * WITHOUT holding the vfs_t.
113 *
114 * After vfs_t's free callback returns, all the handles associated with the
115 * hooks installed on this vfs_t are invalid and must not be used.
116 *
117 * 4. API
118 * None of the APIs should be called during interrupt context above lock
119 * level.
120 *
121 * a) fsh.h
122 * Any of these functions could be called in a hook or a hook remove callback.
123 * The only functions that must not be called inside a {mount,free} callback are
124 * fsd_callback_{install,remove}. Using them will cause a deadlock.
125 *
126 *
127 * fsh_fs_enable(vfs_t *vfsp)
128 * fsh_fs_disable(vfs_t *vfsp)
129 * Enables/disables fsh for a given vfs_t.
130 *
131 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
132 * Installs hooks on vfsp filesystem.
133 * It's important that hooks are executed in LIFO installation order,
134 * which means that if there are hooks A and B installed in this order, B
135 * is going to be executed before A.
136 * It returns a correct handle, or (-1) if hook/callback limit exceeded.
137 * The handle is valid until a free callback returns or an explicit call
138 * to fsh_hook_remove().
139 *
140 * fsh_hook_remove(fsh_handle_t handle)
141 * Removes a hook and invalidates the handle.
142 * It is guaranteed that after this funcion returns, calls to
143 * vnodeops/vfsops won't go through this hook, although there might be
144 * some threads still executing this hook. When hook remove callback is
145 * fired, it is guaranteed that the hook won't be executed anymore. It is
146 * safe to remove all the internal data associated with this hook inside
147 * the hook remove callback. The hook remove callback could be called
148 * inside fsh_hook_remove().
149 *
150 *
151 * fsh_callback_install(fsh_callback_t *callback)
152 * fsh_callback_remove(fsh_callback_handle_t handle)
153 * Installs/removes callbacks for vfs_t mount/free. The mount callback
154 * is executed right before domount() returns. The free callback is
155 * called right before VFS_FREEVFS() is called.
156 * The fsh_callback_install() returns a correct handle, or (-1) if
157 * hook/callback limit exceeded.
158 *
159 *
160 * b) fsh_impl.h (for vfs.c and vnode.c only)
161 * fsh_init()
162 * This call has to be done in vfsinit(). It initialises the fsh. It
163 * is absolutely necessary that this call is made before any other fsh
164 * operation.
165 *
166 * fsh_exec_mount_callbacks(vfs_t *vfsp)
167 * fsh_exec_free_callbacks(vfs_t *vfsp)
168 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
169 *
170 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
171 * Destroys an fsh_fsrecord structure. All the hooks installed on this
172 * vfs_t are then destroyed. free callback is called before this function.
173 *
174 * fsh_foo(ARGUMENTS)
175 * Function used to execute the hook chain for a given syscall.
176 *
177 *
178 * 5. Internals.
179 * fsh_int_t is an internal hook structure. It is reference counted.
180 * fshi_hold() and fshi_rele() should be used whenever needed.
181 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
182 * (local to vfs_t). All entries are unique and are identified by fshi_handle.
183 *
184 * fsh_int_t properties:
185 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both
186 * fsh_map and fshfsr_list
187 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
188 * from fsh_map and marks the hook as *doomed*
189 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
190 * executing it
191 * - if fsh_int_t is marked as *doomed*, the reference counter is not
192 * be increased and thus no thread can acquire this fsh_int_t
193 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
194 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
195 * - fsh_int_t could be also destroyed without fsh_hook_remove() call,
196 * that happens only inside fsh_fsrec_destroy() where it is guaranteed
197 * that there is no thread executing the hook
198 *
199 *
200 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
201 * fsh_fsrecord_t contains:
202 * - an rw-lock that protects the structure
203 * - a list of hooks installed on this vfs_t
204 * - a flag which tells whether fsh is enabled on this vfs_t
205 *
206 *
207 * fsh_fsrec_prepare rule:
208 * Every function that needs vfsp->vfs_fshrecord has to call
209 * fsh_fsrec_prepare() first. If and only if the call is made, it is safe to
210 * use vfsp->vfs_fshrecord.
211 *
212 * Unfortunately, because of unexpected behaviour of some filesystems (no use
213 * of vfs_alloc()/vfs_init()) there's no good place to initialise the
214 * fsh_fshrecord_t structure. The approach being used here is to check if it's
215 * initialised in every call. Because of the fact that no lock could be used
216 * here (the same problem with initialisation), a spinlock is used. This is
217 * explained in more detail in a comment before fsh_fsrec_prepare(). After
218 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
219 * pointer locally, because it won't be changed until vfs_free() is called.
220 *
221 * Exceptions from this rule:
222 * - vfs_free() - it is expected that no other fsh calls would be made for the
223 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
224 * valid pointer and could not be concurrently accessed.
225 * - fshi_rele() - fsh_hook_install() comes before first fshi_rele() call;
226 * the fsh_fsrecord_t has been initialised there
227 *
228 *
229 * When there are no fsh functions (that use a particular fsh_fsrecord_t)
230 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
231 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
232 *
233 * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
234 * passing it to fsh_fsrec_destroy. We don't have to check if it is not equal
235 * to fsh_res_ptr, because all the fsh API calls involving this vfs_t should
236 * end before vfs_free() is called (outside the fsh, fsh_fsrecord is never
237 * equal to fsh_res_ptr). That is guaranteed by the explicit requirement that
238 * the caller of fsh API holds the vfs_t when needed. fsh_hook_remove() must not
239 * be called either, because the handles are invalidated after free callback has
240 * fired.
241 *
242 *
243 * Callbacks:
244 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
245 * before returning from domount()@vfs.c.
246 *
247 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
248 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
249 *
250 *
251 * 6. Locking
252 * a) public
253 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
254 * needed, the client does that.
255 *
256 * No locks are held across hooks or hook remove callbacks execution. It is
257 * safe to use fsh API inside hooks and hook remove callbacks.
258 *
259 * fsh_cb_lock is held across {mount,free} callbacks. Calling
260 * fsh_callback_{install,remove} inside of a callback will cause a deadlock.
261 *
262 * b) internals
263 * Locking diagram:
264 *
265 * fsh_hook_remove() fsh_hook_install() fsh_fsrec_destroy()
266 * | | |
267 * | | |
268 * +------------------+ | +------------+
269 * | | | |
270 * | V | |
271 * V +------------|---|-+
272 * fshi_rele() | fsh_lock | | |
273 * (sometimes) +------------|---|-+
274 * | | |
275 * | +---+-- fshfsr_lock, RW_WRITER -+
276 * | |
277 * V |
278 * +---------------------------------------+ |
279 * | fsh_map | |
280 * | | |
281 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|----------------+
282 * | +------------------------------^--------+
283 * | |
284 * | |
285 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER
286 * | |
287 * | |
288 * fsh_read(), fshi_rele()
289 * fsh_write(),
290 * ... Might be called from:
291 * fsh_hook_remove()
292 * fsh_read(), fsh_write(), ...
293 *
294 *
295 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
296 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
297 * it destroys the unremoved hooks). It is used only when fsh_map needs to be
298 * locked. The usage of this lock guarantees that the data in fsh_map and
299 * fshfsr_lists is consistent.
300 *
301 * In order to make calling callbacks inside callbacks possible, fsh_cb_owner is
302 * set by fsh_exec_{mount,free} callbacks to the thread that owns the
303 * fsh_cb_lock. It's always checked if we are owners of the mutex before
304 * entering it.
305 *
306 */
307
308
309 /* Internals */
310 typedef struct fsh_int {
311 fsh_handle_t fshi_handle;
312 fsh_t fshi_hooks;
313 vfs_t *fshi_vfsp;
314
315 kmutex_t fshi_lock;
316 uint64_t fshi_ref;
317 uint64_t fshi_doomed; /* changed inside fsh_lock */
318
319 /* next node in fshfsr_list */
320 list_node_t fshi_node;
321
322 /* next node in fsh_map */
323 list_node_t fshi_global;
324 } fsh_int_t;
325
326 typedef struct fsh_callback_int {
327 fsh_callback_t fshci_cb;
328 fsh_callback_handle_t fshci_handle;
329 list_node_t fshci_node;
330 } fsh_callback_int_t;
331
332
333 typedef struct fsh_exec {
334 fsh_int_t *fshe_fshi;
335 void *fshe_instance;
336 list_node_t fshe_node;
337 } fsh_exec_t;
338
339
340 static kmutex_t fsh_lock;
341
342 /*
343 * fsh_fsrecord_t is the main internal structure. It's content is protected
344 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
345 * the vfs_t that contains the fsh_fsrecord_t.
346 */
347 struct fsh_fsrecord {
348 krwlock_t fshfsr_lock;
349 int fshfsr_enabled;
350 list_t fshfsr_list;
351 };
352
353 /*
354 * Global list of fsh_int_t. Protected by fsh_lock.
355 */
356 static list_t fsh_map;
357
358 /*
359 * Global list of fsh_callback_int_t.
360 */
361 static kmutex_t fsh_cb_lock;
362 static kmutex_t fsh_cb_owner_lock;
363 static kthread_t *fsh_cb_owner;
364 static list_t fsh_cblist;
365
366 /*
367 * A reserved pointer for fsh purposes. It is used because of the method
368 * chosen for solving concurrency issues with vfs_fshrecord. The full
369 * explanation is in the big theory statement at the beginning of this
370 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
371 */
372 static void *fsh_res_ptr;
373
374 static fsh_fsrecord_t *fsh_fsrec_create();
375
376 int fsh_limit = INT_MAX;
377 static id_space_t *fsh_idspace;
378
379 /*
380 * fsh_fsrec_prepare()
381 *
382 * Important note:
383 * Before using this function, fsh_init() MUST be called. We do that in
384 * vfsinit()@vfs.c.
385 *
386 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
387 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
388 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
389 * filesystems could do the same thing. That's why this solution is
390 * introduced. It should be called before any code that needs access to
391 * vfs_fshrecord.
392 *
393 * Locking:
394 * There are no locks here, because there's no good place to initialise
395 * the lock. Concurrency issues are solved by using atomic instructions
396 * and a spinlock, which is spinning only once for a given vfs_t. Because
397 * of that, the usage of the spinlock isn't bad at all.
398 *
399 * How it works:
400 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
401 * fsh_res_ptr. That's a signal for other threads, that the structure
402 * is being initialised.
403 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
404 * because vfs_fshrecord is being initialised by another call.
405 * c) other cases:
406 * vfs_fshrecord is already initialised, so we can use it. It won't change
407 * until vfs_free() is called. It can't happen when someone is holding
408 * the vfs_t, which is expected from the caller of fsh API.
409 */
410 static void
411 fsh_fsrec_prepare(vfs_t *vfsp)
412 {
413 fsh_fsrecord_t *fsrec;
414
415 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
416 fsh_res_ptr)) == fsh_res_ptr)
417 ;
418
419 if (fsrec == NULL)
420 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
421 }
422
423 /*
424 * API for enabling/disabling fsh per vfs_t.
425 *
426 * A newly created vfs_t has fsh enabled by default. If one would want to change
427 * this behaviour, mount callbacks could be used.
428 *
429 * The caller is expected to hold the vfs_t.
430 *
431 * These functions must NOT be called in a hook.
432 */
433 void
434 fsh_fs_enable(vfs_t *vfsp)
435 {
436 fsh_fsrec_prepare(vfsp);
437
438 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
439 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
440 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
441 }
442
443 void
444 fsh_fs_disable(vfs_t *vfsp)
445 {
446 fsh_fsrec_prepare(vfsp);
447
448 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
449 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
450 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
451 }
452
453 /*
454 * API used for installing hooks. fsh_handle_t is returned for further
455 * actions (currently just removing) on this set of hooks.
456 *
457 * It's important that the hooks are executed in LIFO installation order (they
458 * are added to the head of the hook list).
459 *
460 * The caller is expected to hold the vfs_t.
461 *
462 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
463 */
464 fsh_handle_t
465 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
466 {
467 fsh_handle_t handle;
468 fsh_int_t *fshi;
469
470 fsh_fsrec_prepare(vfsp);
471
472 if ((handle = id_alloc(fsh_idspace)) == -1)
473 return (-1);
474
475 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
476 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
477 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
478 fshi->fshi_handle = handle;
479 fshi->fshi_doomed = 0;
480 fshi->fshi_ref = 1;
481 fshi->fshi_vfsp = vfsp;
482
483 mutex_enter(&fsh_lock);
484 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
485 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
486 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
487
488 list_insert_head(&fsh_map, fshi);
489 mutex_exit(&fsh_lock);
490
491 return (handle);
492 }
493
494 static int
495 fshi_hold(fsh_int_t *fshi)
496 {
497 int can_hold;
498
499 mutex_enter(&fshi->fshi_lock);
500 if (fshi->fshi_doomed == 1) {
501 can_hold = 0;
502 } else {
503 fshi->fshi_ref++;
504 can_hold = 1;
505 }
506 mutex_exit(&fshi->fshi_lock);
507
508 return (can_hold);
509 }
510
511 /*
512 * This function must not be called while fshfsr_lock is held. Doing so could
513 * cause a deadlock.
514 */
515 static void
516 fshi_rele(fsh_int_t *fshi)
517 {
518 int destroy;
519
520 mutex_enter(&fshi->fshi_lock);
521 ASSERT(fshi->fshi_ref > 0);
522 fshi->fshi_ref--;
523 if (fshi->fshi_ref == 0) {
524 ASSERT(fshi->fshi_doomed == 1);
525 destroy = 1;
526 } else {
527 destroy = 0;
528 }
529 mutex_exit(&fshi->fshi_lock);
530
531 if (destroy) {
532 /*
533 * At this point, we are sure that fsh_hook_remove() has been
534 * called, that's why we don't remove the fshi from fsh_map.
535 * fsh_hook_remove() did that already.
536 * There is also no need to call fsh_fsrec_prepare() here.
537 */
538 fsh_fsrecord_t *fsrecp;
539
540 /*
541 * We don't have to call fsh_fsrec_prepare() here.
542 * fsh_fsrecord_t is already initialised, because we've found a
543 * mapping for the given handle.
544 */
545 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
546 ASSERT(fsrecp != NULL);
547 ASSERT(fsrecp != fsh_res_ptr);
548
549 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
550 list_remove(&fsrecp->fshfsr_list, fshi);
551 rw_exit(&fsrecp->fshfsr_lock);
552
553 if (fshi->fshi_hooks.remove_cb != NULL)
554 (*fshi->fshi_hooks.remove_cb)(
555 fshi->fshi_hooks.arg, fshi->fshi_handle);
556
557 id_free(fsh_idspace, fshi->fshi_handle);
558 mutex_destroy(&fshi->fshi_lock);
559 kmem_free(fshi, sizeof (*fshi));
560 }
561 }
562
563 /*
564 * Used for removing a hook set.
565 *
566 * fsh_hook_remove() invalidates the given handle.
567 *
568 * It is guaranteed, that after successful return from fsh_hook_remove(),
569 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
570 * go through this hook.
571 *
572 * There is no guarantee that after fsh_hook_remove() returns, the hook
573 * associated with the handle won't be executing. Instead, it is guaranteed that
574 * when remove_cb() is called, the hook finished it's execution in all threads.
575 * It is safe to destroy all internal data associated with this hook inside
576 * remove_cb().
577 *
578 * It is possible that remove_cb() would be called before fsh_hook_remove()
579 * returns.
580 *
581 * Returns (-1) if hook wasn't found, 0 otherwise.
582 */
583 int
584 fsh_hook_remove(fsh_handle_t handle)
585 {
586 fsh_int_t *fshi;
587
588 mutex_enter(&fsh_lock);
589 for (fshi = list_head(&fsh_map); fshi != NULL;
590 fshi = list_next(&fsh_map, fshi)) {
591 if (fshi->fshi_handle == handle) {
592 list_remove(&fsh_map, fshi);
593 break;
594 }
595 }
596
597 if (fshi == NULL)
598 return (-1);
599
600 mutex_enter(&fshi->fshi_lock);
601 ASSERT(fshi->fshi_doomed == 0);
602 fshi->fshi_doomed = 1;
603 mutex_exit(&fshi->fshi_lock);
604 mutex_exit(&fsh_lock);
605
606 fshi_rele(fshi);
607
608 return (0);
609 }
610
611 /*
612 * API for installing global mount/free callbacks.
613 *
614 * fsh_callback_t fields:
615 * fshc_arg - argument passed to the callbacks
616 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
617 * drops to 0
618 * fshc_mount - callback fired right before returning from domount()
619 * The first argument of these callbacks is the vfs_t that is mounted/freed.
620 * The second one is the fshc_arg.
621 *
622 * fsh_callback_handle_t is filled out by this function.
623 *
624 * Returns (-1) if hook/callback limit exceeded.
625 *
626 * Calling this function in a {mount,free} callback will cause a deadlock.
627 */
628 fsh_callback_handle_t
629 fsh_callback_install(fsh_callback_t *callback)
630 {
631 fsh_callback_int_t *fshci;
632 fsh_callback_handle_t handle;
633
634 if ((handle = id_alloc(fsh_idspace)) == -1)
635 return (-1);
636
637 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
638 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
639 fshci->fshci_handle = handle;
640
641 mutex_enter(&fsh_cb_lock);
642 list_insert_head(&fsh_cblist, fshci);
643 mutex_exit(&fsh_cb_lock);
644
645 return (handle);
646 }
647
648 /*
649 * API for removing global mount/free callbacks.
650 *
651 * Returns (-1) if callback wasn't found, 0 otherwise.
652 *
653 * Calling this function in a {mount,free} callback will cause a deadlock.
654 */
655 int
656 fsh_callback_remove(fsh_callback_handle_t handle)
657 {
658 fsh_callback_int_t *fshci;
659
660 mutex_enter(&fsh_cb_lock);
661
662 for (fshci = list_head(&fsh_cblist); fshci != NULL;
663 fshci = list_next(&fsh_cblist, fshci)) {
664 if (fshci->fshci_handle == handle) {
665 list_remove(&fsh_cblist, fshci);
666 break;
667 }
668 }
669
670 mutex_exit(&fsh_cb_lock);
671
672 if (fshci == NULL)
673 return (-1);
674
675 kmem_free(fshci, sizeof (*fshci));
676 id_free(fsh_idspace, handle);
677
678 return (0);
679 }
680
681 /*
682 * This function is executed right before returning from domount()@vfs.c.
683 * We are sure that it's called only after fsh_init().
684 * It executes all the mount callbacks installed in the fsh.
685 *
686 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
687 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
688 * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
689 * section in the big theory statement at the top of this file.
690 */
691 void
692 fsh_exec_mount_callbacks(vfs_t *vfsp)
693 {
694 fsh_callback_int_t *fshci;
695 fsh_callback_t *cb;
696 int fsh_context;
697
698 mutex_enter(&fsh_cb_owner_lock);
699 fsh_context = fsh_cb_owner == curthread;
700 mutex_exit(&fsh_cb_owner_lock);
701
702 if (!fsh_context) {
703 mutex_enter(&fsh_cb_lock);
704 mutex_enter(&fsh_cb_owner_lock);
705 fsh_cb_owner = curthread;
706 mutex_exit(&fsh_cb_owner_lock);
707 }
708
709 ASSERT(MUTEX_HELD(&fsh_cb_lock));
710
711 for (fshci = list_head(&fsh_cblist); fshci != NULL;
712 fshci = list_next(&fsh_cblist, fshci)) {
713 cb = &fshci->fshci_cb;
714 if (cb->fshc_mount != NULL)
715 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
716 }
717
718 if (!fsh_context) {
719 mutex_enter(&fsh_cb_owner_lock);
720 fsh_cb_owner = NULL;
721 mutex_exit(&fsh_cb_owner_lock);
722 mutex_exit(&fsh_cb_lock);
723 }
724 }
725
726 /*
727 * This function is executed right before VFS_FREEVFS() is called in
728 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
729 * It executes all the free callbacks installed in the fsh.
730 *
731 * free() callback is the point after the handles associated with the hooks
732 * installed on this vfs_t become invalid
733 */
734 void
735 fsh_exec_free_callbacks(vfs_t *vfsp)
736 {
737 fsh_callback_int_t *fshci;
738 fsh_callback_t *cb;
739 int fsh_context;
740
741 mutex_enter(&fsh_cb_owner_lock);
742 fsh_context = fsh_cb_owner == curthread;
743 mutex_exit(&fsh_cb_owner_lock);
744
745 if (!fsh_context) {
746 mutex_enter(&fsh_cb_lock);
747 mutex_enter(&fsh_cb_owner_lock);
748 fsh_cb_owner = curthread;
749 mutex_exit(&fsh_cb_owner_lock);
750 }
751
752 ASSERT(MUTEX_HELD(&fsh_cb_lock));
753
754 for (fshci = list_head(&fsh_cblist); fshci != NULL;
755 fshci = list_next(&fsh_cblist, fshci)) {
756 cb = &fshci->fshci_cb;
757 if (cb->fshc_free != NULL)
758 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
759 }
760
761 if (!fsh_context) {
762 mutex_enter(&fsh_cb_owner_lock);
763 fsh_cb_owner = NULL;
764 mutex_exit(&fsh_cb_owner_lock);
765 mutex_exit(&fsh_cb_lock);
766 }
767 }
768
769 /*
770 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
771 *
772 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
773 * does, it executes it. If not, underlying vnodeop/vfsop is called.
774 *
775 * These interfaces are using fsh_res_ptr (in fsh_fsrec_prepare()), so it's
776 * absolutely necessary to call fsh_init() before using them. That's done in
777 * vfsinit().
778 *
779 * While these functions are executing, it's expected that necessary vfs_t's
780 * are held so that vfs_free() isn't called. vfs_free() expects that noone
781 * accesses vfs_fshrecord of a given vfs_t.
782 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
783 * alive and valid.
784 * All these expectations are met because these functions are used only in
785 * correspondng {fop,fsop}_foo() functions.
786 */
787 int
788 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
789 caller_context_t *ct)
790 {
791 int ret;
792 fsh_fsrecord_t *fsrecp;
793 fsh_int_t *fshi;
794 fsh_exec_t *fshe;
795 list_t exec_list;
796
797 fsh_fsrec_prepare(vp->v_vfsp);
798 fsrecp = vp->v_vfsp->vfs_fshrecord;
799
800 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
801 if (!(fsrecp->fshfsr_enabled)) {
802 rw_exit(&fsrecp->fshfsr_lock);
803 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
804 }
805
806 list_create(&exec_list, sizeof (fsh_exec_t),
807 offsetof(fsh_exec_t, fshe_node));
808
809 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
810 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
811 if (fshi->fshi_hooks.pre_read != NULL ||
812 fshi->fshi_hooks.post_read != NULL) {
813 if (fshi_hold(fshi)) {
814 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
815 fshe->fshe_fshi = fshi;
816 list_insert_tail(&exec_list, fshe);
817 }
818 }
819 }
820 rw_exit(&fsrecp->fshfsr_lock);
821
822 /* Execute pre hooks */
823 for (fshe = list_head(&exec_list); fshe != NULL;
824 fshe = list_next(&exec_list, fshe)) {
825 if (fshe->fshe_fshi->fshi_hooks.pre_read != NULL)
826 (*fshe->fshe_fshi->fshi_hooks.pre_read)(
827 fshe->fshe_fshi->fshi_hooks.arg,
828 &fshe->fshe_instance,
829 &vp, &uiop, &ioflag, &cr, &ct);
830 }
831
832 ret = (*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
833
834 /* Execute post hooks */
835 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
836 if (fshe->fshe_fshi->fshi_hooks.post_read != NULL)
837 ret = (*fshe->fshe_fshi->fshi_hooks.post_read)(
838 ret, fshe->fshe_fshi->fshi_hooks.arg,
839 fshe->fshe_instance,
840 vp, uiop, ioflag, cr, ct);
841 fshi_rele(fshe->fshe_fshi);
842 kmem_free(fshe, sizeof (*fshe));
843 }
844 list_destroy(&exec_list);
845
846 return (ret);
847 }
848
849 int
850 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
851 caller_context_t *ct)
852 {
853 int ret;
854 fsh_fsrecord_t *fsrecp;
855 fsh_int_t *fshi;
856 fsh_exec_t *fshe;
857 list_t exec_list;
858
859 fsh_fsrec_prepare(vp->v_vfsp);
860 fsrecp = vp->v_vfsp->vfs_fshrecord;
861
862 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
863 if (!(fsrecp->fshfsr_enabled)) {
864 rw_exit(&fsrecp->fshfsr_lock);
865 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
866 }
867
868 list_create(&exec_list, sizeof (fsh_exec_t),
869 offsetof(fsh_exec_t, fshe_node));
870
871 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
872 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
873 if (fshi->fshi_hooks.pre_write != NULL ||
874 fshi->fshi_hooks.post_write != NULL) {
875 if (fshi_hold(fshi)) {
876 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
877 fshe->fshe_fshi = fshi;
878 list_insert_tail(&exec_list, fshe);
879 }
880 }
881 }
882 rw_exit(&fsrecp->fshfsr_lock);
883
884 /* Execute pre hooks */
885 for (fshe = list_head(&exec_list); fshe != NULL;
886 fshe = list_next(&exec_list, fshe)) {
887 if (fshe->fshe_fshi->fshi_hooks.pre_write != NULL)
888 (*fshe->fshe_fshi->fshi_hooks.pre_write)(
889 fshe->fshe_fshi->fshi_hooks.arg,
890 &fshe->fshe_instance,
891 &vp, &uiop, &ioflag, &cr, &ct);
892 }
893
894 ret = (*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
895
896 /* Execute post hooks */
897 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
898 if (fshe->fshe_fshi->fshi_hooks.post_write != NULL)
899 ret = (*fshe->fshe_fshi->fshi_hooks.post_write)(
900 ret, fshe->fshe_fshi->fshi_hooks.arg,
901 fshe->fshe_instance,
902 vp, uiop, ioflag, cr, ct);
903 fshi_rele(fshe->fshe_fshi);
904 kmem_free(fshe, sizeof (*fshe));
905 }
906 list_destroy(&exec_list);
907
908 return (ret);
909 }
910
911 int
912 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
913 {
914 int ret;
915 fsh_fsrecord_t *fsrecp;
916 fsh_int_t *fshi;
917 fsh_exec_t *fshe;
918 list_t exec_list;
919
920 fsh_fsrec_prepare(vfsp);
921 fsrecp = vfsp->vfs_fshrecord;
922
923 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
924 if (!(fsrecp->fshfsr_enabled)) {
925 rw_exit(&fsrecp->fshfsr_lock);
926 return ((*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr));
927 }
928
929 list_create(&exec_list, sizeof (fsh_exec_t),
930 offsetof(fsh_exec_t, fshe_node));
931
932 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
933 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
934 if (fshi->fshi_hooks.pre_mount != NULL ||
935 fshi->fshi_hooks.post_mount != NULL) {
936 if (fshi_hold(fshi)) {
937 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
938 fshe->fshe_fshi = fshi;
939 list_insert_tail(&exec_list, fshe);
940 }
941 }
942 }
943 rw_exit(&fsrecp->fshfsr_lock);
944
945 /* Execute pre hooks */
946 for (fshe = list_head(&exec_list); fshe != NULL;
947 fshe = list_next(&exec_list, fshe)) {
948 if (fshe->fshe_fshi->fshi_hooks.pre_mount != NULL)
949 (*fshe->fshe_fshi->fshi_hooks.pre_mount)(
950 &fshe->fshe_fshi->fshi_hooks.arg,
951 &fshe->fshe_instance,
952 &vfsp, &mvp, &uap, &cr);
953 }
954
955 ret = (*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
956
957 /* Execute post hooks */
958 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
959 if (fshe->fshe_fshi->fshi_hooks.post_mount != NULL)
960 ret = (*fshe->fshe_fshi->fshi_hooks.post_mount)(
961 ret, fshe->fshe_fshi->fshi_hooks.arg,
962 fshe->fshe_instance,
963 vfsp, mvp, uap, cr);
964 fshi_rele(fshe->fshe_fshi);
965 kmem_free(fshe, sizeof (*fshe));
966 }
967 list_destroy(&exec_list);
968
969 return (ret);
970 }
971
972 int
973 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
974 {
975 int ret;
976 fsh_fsrecord_t *fsrecp;
977 fsh_int_t *fshi;
978 fsh_exec_t *fshe;
979 list_t exec_list;
980
981 fsh_fsrec_prepare(vfsp);
982 fsrecp = vfsp->vfs_fshrecord;
983
984 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
985 if (!(fsrecp->fshfsr_enabled)) {
986 rw_exit(&fsrecp->fshfsr_lock);
987 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
988 }
989
990 list_create(&exec_list, sizeof (fsh_exec_t),
991 offsetof(fsh_exec_t, fshe_node));
992
993 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
994 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
995 if (fshi->fshi_hooks.pre_unmount != NULL ||
996 fshi->fshi_hooks.post_unmount != NULL) {
997 if (fshi_hold(fshi)) {
998 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
999 fshe->fshe_fshi = fshi;
1000 list_insert_tail(&exec_list, fshe);
1001 }
1002 }
1003 }
1004 rw_exit(&fsrecp->fshfsr_lock);
1005
1006 /* Execute pre hooks */
1007 for (fshe = list_head(&exec_list); fshe != NULL;
1008 fshe = list_next(&exec_list, fshe)) {
1009 if (fshe->fshe_fshi->fshi_hooks.pre_unmount != NULL)
1010 (*fshe->fshe_fshi->fshi_hooks.pre_unmount)(
1011 fshe->fshe_fshi->fshi_hooks.arg,
1012 &fshe->fshe_instance,
1013 &vfsp, &flag, &cr);
1014 }
1015
1016 ret = (*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr);
1017
1018 /* Execute post hooks */
1019 while ((fshe = list_remove_tail(&exec_list)) != NULL) {
1020 if (fshe->fshe_fshi->fshi_hooks.post_unmount != NULL)
1021 ret = (*fshe->fshe_fshi->fshi_hooks.post_unmount)(
1022 ret, fshe->fshe_fshi->fshi_hooks.arg,
1023 fshe->fshe_instance,
1024 vfsp, flag, cr);
1025 fshi_rele(fshe->fshe_fshi);
1026 kmem_free(fshe, sizeof (*fshe));
1027 }
1028 list_destroy(&exec_list);
1029
1030 return (ret);
1031 }
1032
1033 /*
1034 * This is the funtion used by fsh_fsrec_prepare() to allocate a new
1035 * fsh_fsrecord. This function is called by the first function which
1036 * access the vfs_fshrecord and finds out it's NULL.
1037 */
1038 static fsh_fsrecord_t *
1039 fsh_fsrec_create()
1040 {
1041 fsh_fsrecord_t *fsrecp;
1042
1043 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
1044 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
1045 offsetof(fsh_int_t, fshi_node));
1046 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
1047 fsrecp->fshfsr_enabled = 1;
1048 return (fsrecp);
1049 }
1050
1051
1052 /*
1053 * This call must be used ONLY in vfs_free().
1054 *
1055 * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
1056 * passing it to fsh_fsrec_destroy.
1057 *
1058 * All the remaining hooks are being removed here.
1059 */
1060 void
1061 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
1062 {
1063 fsh_int_t *fshi;
1064
1065 VERIFY(fsrecp != NULL);
1066
1067 _NOTE(CONSTCOND)
1068 while (1) {
1069 mutex_enter(&fsh_lock);
1070 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
1071 fshi = list_remove_head(&fsrecp->fshfsr_list);
1072 rw_exit(&fsrecp->fshfsr_lock);
1073 if (fshi == NULL) {
1074 mutex_exit(&fsh_lock);
1075 break;
1076 }
1077 ASSERT(fshi->fshi_doomed == 0);
1078 list_remove(&fsh_map, fshi);
1079 mutex_exit(&fsh_lock);
1080
1081 if (fshi->fshi_hooks.remove_cb != NULL)
1082 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
1083 fshi->fshi_handle);
1084
1085 id_free(fsh_idspace, fshi->fshi_handle);
1086 mutex_destroy(&fshi->fshi_lock);
1087 kmem_free(fshi, sizeof (*fshi));
1088
1089 }
1090
1091 list_destroy(&fsrecp->fshfsr_list);
1092 rw_destroy(&fsrecp->fshfsr_lock);
1093 kmem_free(fsrecp, sizeof (*fsrecp));
1094 }
1095
1096 /*
1097 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
1098 * before every other fsh call.
1099 */
1100 void
1101 fsh_init(void)
1102 {
1103 mutex_init(&fsh_cb_lock, NULL, MUTEX_DRIVER, NULL);
1104 mutex_init(&fsh_cb_owner_lock, NULL, MUTEX_DRIVER, NULL);
1105 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
1106 offsetof(fsh_callback_int_t, fshci_node));
1107
1108 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
1109
1110 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
1111 fshi_global));
1112
1113 /* See comment above fsh_fsrec_prepare() */
1114 fsh_res_ptr = (void *)-1;
1115
1116 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
1117 }