1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Damian Bogel. All rights reserved.
14 */
15
16 #include <sys/debug.h>
17 #include <sys/errno.h>
18 #include <sys/fsh.h>
19 #include <sys/fsh_impl.h>
20 #include <sys/id_space.h>
21 #include <sys/kmem.h>
22 #include <sys/ksynch.h>
23 #include <sys/list.h>
24 #include <sys/sunddi.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/vfs.h>
28 #include <sys/vnode.h>
29
30 /*
31 * TODO:
32 * - solve the fsh_cblist_lock deadlock potential problem (fsh_callback_install
33 * inside a callback) (that is also a problem for hooks)
34 */
35
36 /*
37 * Filesystem hook framework (fsh)
38 *
39 * 1. Abstract.
40 * The main goal of the filesystem hook framework is to provide an easy way to
41 * inject consumer-defined behaviour into vfs/vnode calls. fsh works on
42 * vfs_t granularity.
43 *
44 *
45 * 2. Overview.
46 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
47 * - pointers to hooking functions (named after corresponding
48 * vnodeops/vfsops)
49 * - a pointer to an argument to pass (this is shared for all the
50 * hooks in a given fsh_t)
51 * - pointer to a callback that should be executed after a hook is removed
52 *
53 * The information from fsh_t is copied by the fsh and an fsh_handle_t
54 * is returned. It should be used for further removing.
55 *
56 *
57 * 3. Usage.
58 * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
59 * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
60 *
61 * fsh_t is a structure filled out by the consumer. If a consumer does not want
62 * to add/remove a hook for function foo(), he should fill the foo field of
63 * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
64 * two additional arguments:
65 * - fsh_int_t *fsh_int - this argument MUST be passed to
66 * hook_next_foo(). fsh wouldn't know which hook to execute next
67 * without it
68 * - void *arg - this is the argument passed with fsh_t during
69 * installation
70 * fsh_t containts also two other fields:
71 * - void *arg - mentioned above, which is the argument passed to the hooks
72 * - remove_callback - it's a pointer to a callback that should execute
73 * right after the hook has been removed. It takes the arg and the handle
74 * as parameters.
75 * After installation, an fsh_handle_t is returned to the caller.
76 *
77 * When a vfs_t is freed, all remaining hooks are being removed and the
78 * remove_callbacks are being executed.
79 *
80 * Every hook function is responsible for passing the control to the next
81 * hook associated with a particular call. In order to provide an easy way to
82 * modify the behaviour of a function call both before and after the
83 * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
84 * fsh_next_foo() at some point. This function does necessary internal
85 * operations and calls the next hook, until there's no hook left, then it
86 * calls the underlying vfsop/vnodeop.
87 * Example:
88 * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
89 * cmn_err(CE_NOTE, "freefs called!\n");
90 * return (fsh_next_freefs(fsh_int, vfsp));
91 * }
92 *
93 * A consumer might want to fire callbacks when vfs_t's are being mounted
94 * or freed. There's an fsh_callback_t structure provided to install such
95 * callbacks along with the API.
96 *
97 *
98 * 4. API
99 * None of the APIs should be called during interrupt context above lock
100 * level. The only exceptions are fsh_next_foo() functions, which do not use
101 * locks.
102 *
103 * a) fsh.h
104 * None of the functions listed below should be called inside of a hook
105 * Doing so will cause a deadlock. The only exceptions are fsh_next_foo() and
106 * fsh_callback_{install,remove}().
107 *
108 * fsh_callback_{install,remove}() should not be called inside of a {mount,free}
109 * callback. Doing so will cause a deadlock.
110 *
111 * fsh_fs_enable(vfs_t *vfsp)
112 * fsh_fs_disable(vfs_t *vfsp)
113 * Enables/disables fsh for a given vfs_t.
114 *
115 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
116 * Installs hooks on vfsp filesystem. It's important that hooks are
117 * executed in LIFO installation order, which means that if there are
118 * hooks A and B installed in this order, B is going to be executed
119 * before A. It returns a correct handle, or (-1) if hook/callback
120 * limit exceeded.
121 *
122 * fsh_hook_remove(fsh_handle_t handle)
123 * Removes hooks.
124 *
125 * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
126 * This is the function which should be called once in every hook. It
127 * does the necessary internal operations and passes control to the
128 * next hook or, if there's no hook left, to the underlying
129 * vfsop/vnodeop.
130 *
131 * fsh_callback_install(fsh_callback_t *callback)
132 * fsh_callback_remove(fsh_callback_handle_t handle)
133 * Installs/removes callbacks for vfs_t mount/free. The mount callback
134 * is executed right before domount() returns. The free callback is
135 * called right before VFS_FREEVFS() is called. The
136 * fsh_callback_install() returns a correct handle, or (-1) if
137 * hook/callback limit exceeded.
138 *
139 * b) fsh_impl.h (for vfs.c and vnode.c only)
140 * fsh_init()
141 * This call has to be done in vfsinit(). It initialises the fsh. It
142 * is absolutely necessary that this call is made before any other fsh
143 * operation.
144 *
145 * fsh_exec_mount_callbacks(vfs_t *vfsp)
146 * fsh_exec_free_callbacks(vfs_t *vfsp)
147 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
148 *
149 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
150 * Destroys an fsh_fsrecord structure.
151 *
152 * fsh_foo(ARGUMENTS)
153 * Function used to start executing the hook chain for a given call.
154 *
155 * ---
156 *
157 * Because of the fact that unremoved hooks are being removed when a vfs_t
158 * is being freed, the client should not expect that remove_callback()
159 * would be called only when the vfs_t is still alive.
160 *
161 * 5. Internals.
162 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
163 * fsh_fsrecord_t contains:
164 * - an rw-lock that protects the structure
165 * - a list of hooks installed on this vfs_t
166 * - a flag which tells whether fsh is enabled on this vfs_t
167 *
168 * Unfortunately, because of unexpected behaviour of some filesystems (no
169 * use of vfs_alloc()/vfs_init()) there's no good place to initialise the
170 * fsh_fshrecord_t structure. The approach being used here is to check if
171 * it's initialised in every call. Because of the fact that no lock could
172 * be used here (the same problem with initialisation), a spinlock is used.
173 * This is explained in more detail in a comment before
174 * fsh_prepare_fsrec(), a function that should be used whenever a
175 * vfsp->vfs_fshrecord needs to be accessed. After doing that, it's
176 * completely safe to keep this pointer locally, because it won't be
177 * changed until vfs_free() is called.
178 *
179 * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
180 * where there is expected that no other fsh calls would be made for the
181 * vfs_t that's being freed.
182 *
183 * When there are no fsh functions that use a particular fsh_fsrecord_t
184 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
185 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
186 *
187 *
188 * fsh_next_foo()
189 * This function is quite simple. It takes the fsh_int_t and passes control to
190 * the next hook or to the underlying vnodeop/vfsop.
191 *
192 *
193 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
194 * before returning from domount()@vfs.c.
195 *
196 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
197 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
198 *
199 *
200 * 6. Concurrency
201 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
202 * needed, the consumer does that.
203 *
204 * An fsh_fsrecord_t of a vfs_t is read-locked (fshfsr_lock) by every
205 * fsh_foo() function (with the mentioned vfs_t as a parameter, of course).
206 * This means that fsh_hook_{install,remove}() must NOT be called inside of
207 * a hook, because it will cause a deadlock.
208 *
209 * The same thing applies to callbacks. fsh_cblist is read-locked by
210 * fsh_exec_{mount,free}(). This means that fsh_callback_{install,remove}
211 * must not be called inside a callback, because it will cause a deadlock.
212 *
213 * Solution to concurrency issues involving vfs_fshrecord are explained
214 * both in chapter 5th "Internals" and before fsh_prepare_fsrec() function.
215 */
216
217 /* Internals */
218 struct fsh_int {
219 fsh_handle_t fshi_handle;
220 fsh_t fshi_hooks;
221 list_node_t fshi_next;
222 };
223
224 typedef struct fsh_callback_int {
225 fsh_callback_t fshci_cb;
226 fsh_callback_handle_t fshci_handle;
227 list_node_t fshci_next;
228 } fsh_callback_int_t;
229
230 /* Used for mapping an fsh_handle_t to fsh_int_t. */
231 typedef struct fsh_mapping {
232 fsh_handle_t fshm_handle;
233 fsh_int_t *fshm_fshi;
234 vfs_t *fshm_vfsp;
235 list_node_t fshm_next;
236 } fsh_mapping_t;
237
238 /*
239 * fsh_fsrecord_t is the main internal structure. It's content is protected
240 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
241 * the vfs_t that contains the fsh_fsrecord_t.
242 *
243 * It is guaranteed by the fsh_prepare_fsrec() that outside the fsh,
244 * a pointer to fsh_fsrecord inside a vfs_t is never equal to fsh_res_ptr.
245 */
246 struct fsh_fsrecord {
247 krwlock_t fshfsr_lock;
248 int fshfsr_enabled;
249 list_t fshfsr_list; /* list of fsh_int_t */
250 };
251
252 /*
253 * It's a list of fsh_mapping_t's used to map fsh_handle_t's to
254 * fsh_int_t's. This is needed because of the fact that we'd like an opaque
255 * handle returned to the fsh API client after a hook is successfully
256 * installed. We'd like to make the handle the only thing that is needed
257 * after the hooks are installed, for futher actions on them. This means,
258 * that there is no easy way to search for the hooks matching a handle,
259 * without having the vfs_t on which they are installed.
260 * The same problem doesn't apply to callbacks, that's why fsh_map handles
261 * only fsh_handle_t to fsh_int_t translation.
262 */
263 static kmutex_t fsh_map_lock;
264 static list_t fsh_map;
265
266 /*
267 * It's a list of fsh_callback_int_t's. Unlike hooks, there is no need to
268 * keep a separate list for translating handles to fsh_callback_int_t's,
269 * because a callback list is global for all the vfs_t's.
270 */
271 static krwlock_t fsh_cblist_lock;
272 static list_t fsh_cblist;
273
274 /*
275 * A reserved pointer for fsh purposes. It is used because of the method
276 * chosen for solving concurrency issues with vfs_fshrecord. The full
277 * explanation is in the big theory statement at the beginning of this
278 * file. It is initialised in fsh_init().
279 */
280 static void *fsh_res_ptr;
281
282 static fsh_fsrecord_t *fsh_fsrec_create();
283
284 int fsh_limit = INT_MAX;
285 static id_space_t *fsh_idspace;
286
287 /*
288 * Important note:
289 * Before using this function, fsh_init() MUST be called. We do that in
290 * vfsinit()@vfs.c.
291 *
292 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
293 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
294 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
295 * filesystems could do the same thing. That's why this solution is
296 * introduced. It should be called before any code that needs access to
297 * vfs_fshrecord.
298 *
299 * Locking:
300 * There are no locks here, because there's no good place to initialise
301 * the lock. Concurrency issues are solved by using atomic instructions
302 * and a spinlock, which is spinning only once for a given vfs_t. Because
303 * of that, the usage of the spinlock isn't bad at all.
304 *
305 * How it works:
306 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
307 * fsh_res_ptr. That's a signal for other threads, that the structure
308 * is being initialised.
309 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
310 * because vfs_fshrecord is being initialised by another call.
311 * c) other cases:
312 * vfs_fshrecord is already initialised, so we can use it. It won't change
313 * until vfs_free() is called. It can't happen when someone is holding
314 * the vfs_t, which is expected from the caller of fsh API.
315 */
316 static void
317 fsh_prepare_fsrec(vfs_t *vfsp)
318 {
319 fsh_fsrecord_t *fsrec;
320
321 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
322 fsh_res_ptr)) == fsh_res_ptr)
323 ;
324
325 if (fsrec == NULL)
326 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
327 }
328
329 /*
330 * API for enabling/disabling fsh per vfs_t.
331 *
332 * These functions must NOT be called in a hook.
333 */
334 void
335 fsh_fs_enable(vfs_t *vfsp)
336 {
337 fsh_prepare_fsrec(vfsp);
338
339 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
340 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
341 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
342 }
343
344 void
345 fsh_fs_disable(vfs_t *vfsp)
346 {
347 fsh_prepare_fsrec(vfsp);
348
349 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
350 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
351 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
352 }
353
354 /*
355 * API used for installing hooks. fsh_handle_t is returned for further
356 * actions (currently just removing) on this set of hooks.
357 *
358 * fsh_t fields:
359 * - arg - argument passed to every hook
360 * - remove_callback - callback executed right after a hook is removed
361 * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
362 * if there is no hook desired for an operation, it should be set to
363 * NULL
364 *
365 * It's important that the hooks are executed in LIFO installation order (they
366 * are added to the head of the hook list).
367 *
368 * This function must NOT be called in a hook.
369 *
370 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
371 */
372 fsh_handle_t
373 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
374 {
375 fsh_handle_t handle;
376 fsh_int_t *fshi;
377 fsh_mapping_t *mapping;
378
379 fsh_prepare_fsrec(vfsp);
380
381 if ((handle = id_alloc(fsh_idspace)) == -1)
382 return (-1);
383
384 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
385 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
386 fshi->fshi_handle = handle;
387
388 /* If it is called inside of a hook, causes deadlock. */
389 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
390 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
391 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
392
393
394 mapping = kmem_alloc(sizeof (*mapping), KM_SLEEP);
395 mapping->fshm_handle = handle;
396 mapping->fshm_vfsp = vfsp;
397 mapping->fshm_fshi = fshi;
398
399 mutex_enter(&fsh_map_lock);
400 list_insert_head(&fsh_map, mapping);
401 mutex_exit(&fsh_map_lock);
402
403 return (handle);
404 }
405
406 /* Finds a mapping by handle and removes it from from fsh_map. */
407 static fsh_mapping_t *
408 fsh_remove_mapping(fsh_handle_t handle)
409 {
410 fsh_mapping_t *mapping;
411
412 mutex_enter(&fsh_map_lock);
413 for (mapping = list_head(&fsh_map); mapping != NULL;
414 mapping = list_next(&fsh_map, mapping)) {
415 if (mapping->fshm_handle == handle) {
416 list_remove(&fsh_map, mapping);
417 break;
418 }
419 }
420 mutex_exit(&fsh_map_lock);
421 return (mapping);
422 }
423
424 /*
425 * Used for removing a hook set. remove_callback() is executed right after
426 * the hook is removed from the vfs_t.
427 *
428 * This function must NOT be called in a hook.
429 *
430 * Returns (-1) if hook wasn't found, 0 otherwise.
431 */
432 int
433 fsh_hook_remove(fsh_handle_t handle)
434 {
435 fsh_fsrecord_t *fsrecp;
436 fsh_mapping_t *mapping;
437 fsh_t *hooks;
438
439 mapping = fsh_remove_mapping(handle);
440 if (mapping == NULL)
441 return (-1);
442
443 ASSERT(mapping->fshm_fshi->fshi_handle == handle);
444
445 /*
446 * We don't have to call fsh_prepare_fsrec() here. fsh_fsrecord_t
447 * is already initialised, because we've found a mapping for the given
448 * handle. We instead make two ASSERTs.
449 */
450 fsrecp = mapping->fshm_vfsp->vfs_fshrecord;
451 ASSERT(fsrecp != NULL);
452 ASSERT(fsrecp != fsh_res_ptr);
453
454 /* If it is called inside of a hook, causes deadlock. */
455 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
456 list_remove(&fsrecp->fshfsr_list, mapping->fshm_fshi);
457 rw_exit(&fsrecp->fshfsr_lock);
458
459 hooks = &mapping->fshm_fshi->fshi_hooks;
460 if (hooks->remove_callback != NULL)
461 (*(hooks->remove_callback))(hooks->arg, mapping->fshm_handle);
462
463 id_free(fsh_idspace, handle);
464
465 kmem_free(mapping->fshm_fshi, sizeof (*mapping->fshm_fshi));
466 kmem_free(mapping, sizeof (*mapping));
467
468 return (0);
469 }
470
471 /*
472 * API for installing global mount/free callbacks.
473 *
474 * fsh_callback_t fields:
475 * fshc_arg - argument passed to the callbacks
476 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
477 * drops to 0
478 * fshc_mount - callback fired right before returning from domount()
479 * The first argument of these callbacks is the vfs_t that is mounted/freed.
480 * The second one is the fshc_arg.
481 *
482 * fsh_callback_handle_t is filled out by this function.
483 *
484 * This function must NOT be called in a callback, because it will cause
485 * a deadlock.
486 *
487 * Returns (-1) if hook/callback limit exceeded.
488 */
489 fsh_callback_handle_t
490 fsh_callback_install(fsh_callback_t *callback)
491 {
492 fsh_callback_int_t *fshci;
493 fsh_callback_handle_t handle;
494
495 if ((handle = id_alloc(fsh_idspace)) == -1)
496 return (-1);
497
498 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
499 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
500 fshci->fshci_handle = handle;
501
502 /* If it is called in a {mount,free} callback, causes deadlock. */
503 rw_enter(&fsh_cblist_lock, RW_WRITER);
504 list_insert_head(&fsh_cblist, fshci);
505 rw_exit(&fsh_cblist_lock);
506
507 return (handle);
508 }
509
510 /*
511 * API for removing global mount/free callbacks.
512 *
513 * This function must NOT be called in a callback, because it will cause
514 * a deadlock.
515 *
516 * Returns (-1) if callback wasn't found, 0 otherwise.
517 */
518 int
519 fsh_callback_remove(fsh_callback_handle_t handle)
520 {
521 fsh_callback_int_t *fshci;
522
523 /* If it is called in a {mount,free} callback, causes deadlock. */
524 rw_enter(&fsh_cblist_lock, RW_WRITER);
525 for (fshci = list_head(&fsh_cblist); fshci != NULL;
526 fshci = list_next(&fsh_cblist, fshci)) {
527 if (fshci->fshci_handle == handle) {
528 list_remove(&fsh_cblist, fshci);
529 break;
530 }
531 }
532 rw_exit(&fsh_cblist_lock);
533
534 if (fshci == NULL)
535 return (-1);
536
537 kmem_free(fshci, sizeof (*fshci));
538 id_free(fsh_idspace, handle);
539
540 return (0);
541 }
542
543 /*
544 * This function is executed right before returning from domount()@vfs.c.
545 * We are sure that it's called only after fsh_init().
546 * It executes all the mount callbacks installed in the fsh.
547 */
548 void
549 fsh_exec_mount_callbacks(vfs_t *vfsp)
550 {
551 fsh_callback_int_t *fshci;
552 fsh_callback_t *cb;
553
554 rw_enter(&fsh_cblist_lock, RW_READER);
555 for (fshci = list_head(&fsh_cblist); fshci != NULL;
556 fshci = list_next(&fsh_cblist, fshci)) {
557 cb = &fshci->fshci_cb;
558 if (cb->fshc_mount != NULL)
559 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
560 }
561 rw_exit(&fsh_cblist_lock);
562 }
563
564 /*
565 * This function is executed right before VFS_FREEVFS() is called in
566 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
567 * It executes all the free callbacks installed in the fsh.
568 */
569 void
570 fsh_exec_free_callbacks(vfs_t *vfsp)
571 {
572 fsh_callback_int_t *fshci;
573 fsh_callback_t *cb;
574
575 rw_enter(&fsh_cblist_lock, RW_READER);
576 for (fshci = list_head(&fsh_cblist); fshci != NULL;
577 fshci = list_next(&fsh_cblist, fshci)) {
578 cb = &fshci->fshci_cb;
579 if (cb->fshc_free != NULL)
580 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
581 }
582 rw_exit(&fsh_cblist_lock);
583 }
584
585 /*
586 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
587 *
588 * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
589 * absolutely necessary to call fsh_init() before using them. That's done in
590 * vfsinit().
591 *
592 * While these functions are executing, it's expected that necessary vfs_t's
593 * are held so that vfs_free() isn't called. vfs_free() expects that noone
594 * else accesses vfs_fshrecord of a given vfs_t.
595 * It's also the caller responsibility to keep vnode_t passed to fsh_foo()
596 * alive and valid.
597 * All these expectations are met because these functions are used only in
598 * correspondng {fop,fsop}_foo() functions.
599 */
600 int
601 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
602 caller_context_t *ct)
603 {
604 int ret;
605 fsh_fsrecord_t *fsrecp;
606
607 fsh_prepare_fsrec(vp->v_vfsp);
608 fsrecp = vp->v_vfsp->vfs_fshrecord;
609
610 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
611 if (!(fsrecp->fshfsr_enabled)) {
612 rw_exit(&fsrecp->fshfsr_lock);
613 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
614 }
615
616 ret = fsh_next_read(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
617 cr, ct);
618 rw_exit(&fsrecp->fshfsr_lock);
619
620 return (ret);
621 }
622
623 int
624 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
625 caller_context_t *ct)
626 {
627 int ret;
628 fsh_fsrecord_t *fsrecp;
629
630 fsh_prepare_fsrec(vp->v_vfsp);
631 fsrecp = vp->v_vfsp->vfs_fshrecord;
632
633 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
634 if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
635 rw_exit(&fsrecp->fshfsr_lock);
636 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
637 }
638
639 ret = fsh_next_write(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
640 cr, ct);
641 rw_exit(&fsrecp->fshfsr_lock);
642
643 return (ret);
644 }
645
646 int
647 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
648 {
649 fsh_fsrecord_t *fsrecp;
650 int ret;
651
652 fsh_prepare_fsrec(vfsp);
653 fsrecp = vfsp->vfs_fshrecord;
654
655 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
656 if (!(fsrecp->fshfsr_enabled)) {
657 rw_exit(&fsrecp->fshfsr_lock);
658 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
659 }
660
661 ret = fsh_next_mount(list_head(&fsrecp->fshfsr_list), vfsp, mvp, uap,
662 cr);
663 rw_exit(&fsrecp->fshfsr_lock);
664
665 return (ret);
666 }
667
668 int
669 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
670 {
671 fsh_fsrecord_t *fsrecp;
672 int ret;
673
674 fsh_prepare_fsrec(vfsp);
675 fsrecp = vfsp->vfs_fshrecord;
676
677 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
678 if (!(fsrecp->fshfsr_enabled)) {
679 rw_exit(&fsrecp->fshfsr_lock);
680 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
681 }
682
683 ret = fsh_next_unmount(list_head(&fsrecp->fshfsr_list), vfsp, flag, cr);
684 rw_exit(&fsrecp->fshfsr_lock);
685
686 return (ret);
687 }
688
689 /*
690 * This is the funtion used by fsh_prepare_fsrec() to allocate a new
691 * fsh_fsrecord. This function is called by the first function which
692 * access the vfs_fshrecord and finds out it's NULL.
693 */
694 static fsh_fsrecord_t *
695 fsh_fsrec_create()
696 {
697 fsh_fsrecord_t *fsrecp;
698
699 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
700 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
701 offsetof(fsh_int_t, fshi_next));
702 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
703 fsrecp->fshfsr_enabled = 1;
704 return (fsrecp);
705 }
706
707 /*
708 * This call can be used ONLY in vfs_free(). It's assumed that no other
709 * fsh_foo() using the vfs_t that owns the fsh_fsrecord to be destroyed
710 * is executing while call to fsh_fsrec_destroy() is made. With this
711 * assumptions, no concurrency issues occur.
712 *
713 * Before calling this function outside the fsh, it's sufficient and
714 * requited to check if the passed fsh_fsrecord * is not NULL. We don't
715 * have to check if it is not equal to fsh_res_ptr, because all the fsh API
716 * calls involving this vfs_t should end before vfs_free() is called
717 * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
718 * guaranteed by the explicit requirement that the caller of fsh API holds
719 * the vfs_t when needed.
720 *
721 * All the remaining hooks are being removed.
722 */
723 void
724 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
725 {
726 fsh_int_t *fshi;
727
728 ASSERT(fsrecp != NULL);
729
730 /*
731 * TODO: We could add fshi_mapping field to fsh_int_t and make the
732 * process of removing the hooks faster. fsh_remove_mapping() works in
733 * linear time, because it has to find the element that matches the
734 * handle. It doesn't have to be this way, because the fsh_int_t
735 * could have contained the fshi_mapping field pointed to the
736 * corresponding fsh_mapping_t.
737 * Since we don't really know if it's going to be a performance
738 * issue, we leave it like this for the sake of simplicity.
739 */
740 while ((fshi = list_remove_head(&fsrecp->fshfsr_list)) != NULL) {
741 /*
742 * It's a simpler version of the code present in
743 * fsh_hook_remove(). There is no fsh_fshrecord locking here,
744 * because it is not needed anymore.
745 */
746 fsh_mapping_t *mapping;
747 fsh_t *hooks;
748
749 mapping = fsh_remove_mapping(fshi->fshi_handle);
750 /*
751 * mapping == NULL was acceptable in fsh_hook_remove()
752 * because an invalid handle could have been passed by the
753 * client. This time, we take the handle from an internal
754 * structure, so it definitely has to be present in the
755 * fsh_map.
756 */
757 ASSERT(mapping != NULL);
758 ASSERT(mapping->fshm_handle == fshi->fshi_handle);
759 ASSERT(mapping->fshm_fshi == fshi);
760
761 hooks = &mapping->fshm_fshi->fshi_hooks;
762 if (hooks->remove_callback != NULL)
763 (*(hooks->remove_callback))(hooks->arg,
764 mapping->fshm_handle);
765
766 id_free(fsh_idspace, fshi->fshi_handle);
767
768 kmem_free(fshi, sizeof (*fshi));
769 kmem_free(mapping, sizeof (*mapping));
770 }
771 list_destroy(&fsrecp->fshfsr_list);
772 rw_destroy(&fsrecp->fshfsr_lock);
773 kmem_free(fsrecp, sizeof (*fsrecp));
774 }
775
776 /*
777 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
778 * before every other fsh call.
779 */
780 void
781 fsh_init(void)
782 {
783 rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
784 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
785 offsetof(fsh_callback_int_t, fshci_next));
786
787 mutex_init(&fsh_map_lock, NULL, MUTEX_DRIVER, NULL);
788 list_create(&fsh_map, sizeof (fsh_mapping_t),
789 offsetof(fsh_mapping_t, fshm_next));
790
791 /* See comment above fsh_prepare_fsrec() */
792 fsh_res_ptr = (void *)-1;
793
794 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
795 }
796
797 /*
798 * These functions are used to pass control to the next hook or underlying
799 * vop or vfsop. It's consumer doesn't have to worry about any locking, because
800 * all the necessities are guaranteed by the fsh_foo().
801 *
802 * In fsh_next_foo() we execute the hook passed in the first argument and
803 * try to find the next one. It is guaranteed that the passed hook is still
804 * valid, because of fshfsr_lock held by fsh_foo().
805 */
806 int
807 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
808 cred_t *cr, caller_context_t *ct)
809 {
810 fsh_int_t *next;
811
812 if (fshi == NULL)
813 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
814
815 next = fshi;
816 do {
817 next = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list,
818 next);
819 } while (next != NULL && next->fshi_hooks.read == NULL);
820
821 return ((*(fshi->fshi_hooks.read))(next, fshi->fshi_hooks.arg, vp,
822 uiop, ioflag, cr, ct));
823 }
824
825 int
826 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
827 cred_t *cr, caller_context_t *ct)
828 {
829 fsh_int_t *next;
830
831 if (fshi == NULL)
832 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
833
834 next = fshi;
835 do {
836 next = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list, next);
837 } while (next != NULL && next->fshi_hooks.write == NULL);
838
839 return ((*(fshi->fshi_hooks.write))(next, fshi->fshi_hooks.arg, vp,
840 uiop, ioflag, cr, ct));
841 }
842
843 int
844 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
845 cred_t *cr)
846 {
847 fsh_int_t *next;
848
849 if (fshi == NULL)
850 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
851
852 next = fshi;
853 do {
854 next = list_next(&vfsp->vfs_fshrecord->fshfsr_list, next);
855 } while (next != NULL && next->fshi_hooks.mount == NULL);
856
857 return ((*(fshi->fshi_hooks.mount))(next, fshi->fshi_hooks.arg, vfsp,
858 mvp, uap, cr));
859 }
860
861 int
862 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
863 {
864 fsh_int_t *next;
865
866 if (fshi == NULL)
867 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
868
869 next = fshi;
870 do {
871 next = list_next(&vfsp->vfs_fshrecord->fshfsr_list, next);
872 } while (next != NULL && next->fshi_hooks.unmount == NULL);
873
874 return ((*(fshi->fshi_hooks.unmount))(next, fshi->fshi_hooks.arg,
875 vfsp, flag, cr));
876 }