1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Damian Bogel. All rights reserved.
14 */
15
16 #include <sys/debug.h>
17 #include <sys/errno.h>
18 #include <sys/fsh.h>
19 #include <sys/fsh_impl.h>
20 #include <sys/id_space.h>
21 #include <sys/kmem.h>
22 #include <sys/ksynch.h>
23 #include <sys/list.h>
24 #include <sys/sunddi.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <sys/vfs.h>
28 #include <sys/vnode.h>
29
30 /*
31 * Filesystem hook framework (fsh)
32 *
33 * 1. Abstract.
34 * The main goal of the filesystem hook framework is to provide an easy way to
35 * inject client-defined behaviour into vfs/vnode calls. fsh works on
36 * vfs_t granularity.
37 *
38 *
39 * 2. Overview.
40 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
41 * - pointers to hooking functions (named after corresponding
42 * vnodeops/vfsops)
43 * - a pointer to an argument to pass (this is shared for all the
44 * hooks in a given fsh_t)
45 *
46 * The information from fsh_t is copied by the fsh and an fsh_handle_t
47 * is returned. It should be used for further removing.
48 *
49 *
50 * 3. Usage.
51 * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
52 * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
53 *
54 * fsh_t is a structure filled out by the client. If a client does not want
55 * to add/remove a hook for function foo(), he should fill the foo field of
56 * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
57 * two additional arguments:
58 * - fsh_int_t *fsh_int - this argument MUST be passed to
59 * hook_next_foo(). fsh wouldn't know which hook to execute next
60 * without it
61 * - void *arg - this is the argument passed with fsh_t during
62 * installation
63 * After installation, an fsh_handle_t is returned to the caller.
64 *
65 * A client might want to fire callbacks when vfs_t's are being mounted
66 * or freed. There's an fsh_callback_t structure provided to install such
67 * callbacks along with the API.
68 * It is legal to call fsh_hook_{install,remove}() inside a mount callback
69 * WITHOUT holding the vfs_t.
70 *
71 * After vfs_t's free callback returns, all the handles associated with the
72 * hooks installed on this vfs_t are invalid and must not be used.
73 *
74 *
75 * Every hook function is responsible for passing the control to the next
76 * hook associated with a particular call. In order to provide an easy way to
77 * modify the behaviour of a function call both before and after the
78 * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
79 * fsh_next_foo() at some point. This function does necessary internal
80 * operations and calls the next hook, until there's no hook left, then it
81 * calls the underlying vfsop/vnodeop.
82 * Example:
83 * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
84 * cmn_err(CE_NOTE, "freefs called!\n");
85 * return (fsh_next_freefs(fsh_int, vfsp));
86 * }
87 *
88 *
89 * 4. API
90 * None of the APIs should be called during interrupt context above lock
91 * level. The only exceptions are fsh_next_foo() functions, which do not use
92 * locks.
93 *
94 * a) fsh.h
95 * None of the functions listed below should be called inside of a hook
96 * Doing so will cause a deadlock. The only exceptions are fsh_next_foo() and
97 * fsh_callback_{install,remove}().
98 *
99 * fsh_callback_{install,remove}() should not be called inside of a {mount,free}
100 * callback. Doing so will cause a deadlock.
101 *
102 * fsh_fs_enable(vfs_t *vfsp)
103 * fsh_fs_disable(vfs_t *vfsp)
104 * Enables/disables fsh for a given vfs_t.
105 *
106 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
107 * Installs hooks on vfsp filesystem. It's important that hooks are
108 * executed in LIFO installation order, which means that if there are
109 * hooks A and B installed in this order, B is going to be executed
110 * before A. It returns a correct handle, or (-1) if hook/callback
111 * limit exceeded. The handle is valid until a free callback returns
112 * or an explicit call to fsh_hook_remove().
113 *
114 * fsh_hook_remove(fsh_handle_t handle)
115 * Removes a hook and invalidates the handle. It is guaranteed that after
116 * this funcion returns, hook associated with this handle won't be
117 * executing.
118 *
119 * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
120 * This is the function which should be called once in every hook. It
121 * does the necessary internal operations and passes control to the
122 * next hook or, if there's no hook left, to the underlying
123 * vfsop/vnodeop.
124 *
125 * fsh_callback_install(fsh_callback_t *callback)
126 * fsh_callback_remove(fsh_callback_handle_t handle)
127 * Installs/removes callbacks for vfs_t mount/free. The mount callback
128 * is executed right before domount() returns. The free callback is
129 * called right before VFS_FREEVFS() is called. The
130 * fsh_callback_install() returns a correct handle, or (-1) if
131 * hook/callback limit exceeded.
132 *
133 * b) fsh_impl.h (for vfs.c and vnode.c only)
134 * fsh_init()
135 * This call has to be done in vfsinit(). It initialises the fsh. It
136 * is absolutely necessary that this call is made before any other fsh
137 * operation.
138 *
139 * fsh_exec_mount_callbacks(vfs_t *vfsp)
140 * fsh_exec_free_callbacks(vfs_t *vfsp)
141 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
142 *
143 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
144 * Destroys an fsh_fsrecord structure. All the hooks installed on this
145 * vfs_t are then destroyed. free callback is called before this function.
146 *
147 * fsh_foo(ARGUMENTS)
148 * Function used to start executing the hook chain for a given call.
149 *
150 * 5. Internals.
151 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
152 * fsh_fsrecord_t contains:
153 * - an rw-lock that protects the structure
154 * - a list of hooks installed on this vfs_t
155 * - a flag which tells whether fsh is enabled on this vfs_t
156 *
157 * Unfortunately, because of unexpected behaviour of some filesystems (no use of
158 * vfs_alloc()/vfs_init()) there's no good place to initialise the
159 * fsh_fshrecord_t structure. The approach being used here is to check if it's
160 * initialised in every call. Because of the fact that no lock could be used
161 * here (the same problem with initialisation), a spinlock is used. This is
162 * explained in more detail in a comment before fsh_prepare_fsrec(), a function
163 * that MUST be used whenever a vfsp->vfs_fshrecord needs to be accessed. After
164 * doing that, it's completely safe to keep this pointer locally, because it
165 * won't be changed until vfs_free() is called.
166 *
167 * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
168 * where there is expected that no other fsh calls would be made for the
169 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
170 * valid pointer and could not be concurrently accessed.
171 *
172 * When there are no fsh functions (that use a particular fsh_fsrecord_t)
173 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
174 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
175 *
176 *
177 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
178 * before returning from domount()@vfs.c.
179 *
180 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
181 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
182 *
183 *
184 * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
185 * This function is quite simple. It takes the fsh_int_t and passes control
186 * to the next hook or to the underlying vnodeop/vfsop. fshi is always the
187 * next hook to be executed.
188 *
189 *
190 * 6. Concurrency
191 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
192 * needed, the client does that.
193 *
194 * An fsh_fsrecord_t of a vfs_t is read-locked (fshfsr_lock) by every
195 * fsh_foo() function (with the mentioned vfs_t as a parameter, of course).
196 * This means that fsh_hook_{install,remove}() must NOT be called inside of
197 * a hook, because it will cause a deadlock.
198 *
199 * The same thing applies to callbacks. fsh_cblist is read-locked by
200 * fsh_exec_{mount,free}(). This means that fsh_callback_{install,remove}
201 * must not be called inside a callback, because it will cause a deadlock.
202 *
203 * Solution to concurrency issues involving vfs_fshrecord are explained
204 * both in chapter 5th "Internals" and before fsh_prepare_fsrec() function.
205 *
206 * Concurrency issues between fsh_hook_remove() and fsh_fsrec_destroy() are
207 * solved by fsh_remove_lock. For more info see: fsh_remove_lock,
208 * fsh_hook_remove(), fsh_fsrec_destroy().
209 */
210
211 /* Internals */
212 /* Used for mapping an fsh_handle_t to fsh_int_t. */
213 typedef struct fsh_mapping {
214 fsh_handle_t fshm_handle;
215 fsh_int_t *fshm_fshi;
216 vfs_t *fshm_vfsp;
217 list_node_t fshm_next;
218 } fsh_mapping_t;
219
220 struct fsh_int {
221 fsh_handle_t fshi_handle;
222 fsh_t fshi_hooks;
223 fsh_mapping_t *fshi_mapping;
224 list_node_t fshi_next;
225 };
226
227 typedef struct fsh_callback_int {
228 fsh_callback_t fshci_cb;
229 fsh_callback_handle_t fshci_handle;
230 list_node_t fshci_next;
231 } fsh_callback_int_t;
232
233
234 /*
235 * fsh_fsrecord_t is the main internal structure. It's content is protected
236 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
237 * the vfs_t that contains the fsh_fsrecord_t.
238 *
239 * It is guaranteed by the fsh_prepare_fsrec() that outside the fsh,
240 * a pointer to fsh_fsrecord inside a vfs_t is never equal to fsh_res_ptr.
241 */
242 struct fsh_fsrecord {
243 krwlock_t fshfsr_lock;
244 int fshfsr_enabled;
245 list_t fshfsr_list; /* list of fsh_int_t */
246 };
247
248 /*
249 * It's a list of fsh_mapping_t's used to map fsh_handle_t's to
250 * fsh_int_t's. This is needed because of the fact that we'd like an opaque
251 * handle returned to the fsh API client after a hook is successfully
252 * installed. We'd like to make the handle the only thing that is needed
253 * after the hooks are installed, for futher actions on them. This means,
254 * that there is no easy way to search for the hooks matching a handle,
255 * without having the vfs_t on which they are installed.
256 * The same problem doesn't apply to callbacks, that's why fsh_map handles
257 * only fsh_handle_t to fsh_int_t translation.
258 */
259 static kmutex_t fsh_map_lock;
260 static list_t fsh_map;
261
262 /*
263 * The lock is used when there is a need to lock both the fsh_fsrecord_t and
264 * fsh_map.
265 * fsh_hook_remove() and fsh_fsrec_destroy() use this lock to protect both the
266 * fsh_map and the fsh_fsrecord_t associated with a vfs_t on which the removing
267 * is performed. Because fsh_hook_remove() starts with handle, through fsh_map
268 * to vfs_fshrecord and fsh_fsrec_destroy() from vfs_fshrecord to fsh_map, such
269 * lock is necessary.
270 * For more info see: fsh_fsrec_destroy() and fsh_hook_remove()
271 */
272 static kmutex_t fsh_remove_lock;
273
274 /*
275 * It's a list of fsh_callback_int_t's. Unlike hooks, there is no need to
276 * keep a separate list for translating handles to fsh_callback_int_t's,
277 * because a callback list is global for all the vfs_t's.
278 */
279 static krwlock_t fsh_cblist_lock;
280 static list_t fsh_cblist;
281
282 /*
283 * A reserved pointer for fsh purposes. It is used because of the method
284 * chosen for solving concurrency issues with vfs_fshrecord. The full
285 * explanation is in the big theory statement at the beginning of this
286 * file. It is initialised in fsh_init().
287 */
288 static void *fsh_res_ptr;
289
290 static fsh_fsrecord_t *fsh_fsrec_create();
291
292 int fsh_limit = INT_MAX;
293 static id_space_t *fsh_idspace;
294
295 /*
296 * Important note:
297 * Before using this function, fsh_init() MUST be called. We do that in
298 * vfsinit()@vfs.c.
299 *
300 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
301 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
302 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
303 * filesystems could do the same thing. That's why this solution is
304 * introduced. It should be called before any code that needs access to
305 * vfs_fshrecord.
306 *
307 * Locking:
308 * There are no locks here, because there's no good place to initialise
309 * the lock. Concurrency issues are solved by using atomic instructions
310 * and a spinlock, which is spinning only once for a given vfs_t. Because
311 * of that, the usage of the spinlock isn't bad at all.
312 *
313 * How it works:
314 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
315 * fsh_res_ptr. That's a signal for other threads, that the structure
316 * is being initialised.
317 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
318 * because vfs_fshrecord is being initialised by another call.
319 * c) other cases:
320 * vfs_fshrecord is already initialised, so we can use it. It won't change
321 * until vfs_free() is called. It can't happen when someone is holding
322 * the vfs_t, which is expected from the caller of fsh API.
323 */
324 static void
325 fsh_prepare_fsrec(vfs_t *vfsp)
326 {
327 fsh_fsrecord_t *fsrec;
328
329 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
330 fsh_res_ptr)) == fsh_res_ptr)
331 ;
332
333 if (fsrec == NULL)
334 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
335 }
336
337 /*
338 * API for enabling/disabling fsh per vfs_t.
339 *
340 * A newly created vfs_t has fsh enabled by default. If one would want to change
341 * this behaviour, mount callbacks could be used.
342 *
343 * The caller is expected to hold the vfs_t.
344 *
345 * These functions must NOT be called in a hook.
346 */
347 void
348 fsh_fs_enable(vfs_t *vfsp)
349 {
350 fsh_prepare_fsrec(vfsp);
351
352 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
353 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
354 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
355 }
356
357 void
358 fsh_fs_disable(vfs_t *vfsp)
359 {
360 fsh_prepare_fsrec(vfsp);
361
362 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
363 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
364 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
365 }
366
367 /*
368 * API used for installing hooks. fsh_handle_t is returned for further
369 * actions (currently just removing) on this set of hooks.
370 *
371 * fsh_t fields:
372 * - arg - argument passed to every hook
373 * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
374 * if there is no hook desired for an operation, it should be set to
375 * NULL
376 *
377 * It's important that the hooks are executed in LIFO installation order (they
378 * are added to the head of the hook list).
379 *
380 * The caller is expected to hold the vfs_t.
381 *
382 * This function must NOT be called in a hook.
383 *
384 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
385 */
386 fsh_handle_t
387 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
388 {
389 fsh_handle_t handle;
390 fsh_int_t *fshi;
391 fsh_mapping_t *mapping;
392
393 fsh_prepare_fsrec(vfsp);
394
395 if ((handle = id_alloc(fsh_idspace)) == -1)
396 return (-1);
397
398 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
399 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
400 fshi->fshi_handle = handle;
401
402 /*
403 * Although we lock vfs_fshrecord and fsh_map in this function,
404 * there is no need to use fsh_remove_lock. Since it is expected that a
405 * vfs_t is held across this call, fsh_fsrec_destroy() cannot be
406 * executing concurrently. fsh_hook_remove() cannot be called for this
407 * hook set, because a handle passed to that function doesn't yet exist.
408 */
409
410 /* If it is called inside of a hook, causes deadlock. */
411 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
412 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
413 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
414
415 mapping = kmem_alloc(sizeof (*mapping), KM_SLEEP);
416 mapping->fshm_handle = handle;
417 mapping->fshm_vfsp = vfsp;
418 mapping->fshm_fshi = fshi;
419 fshi->fshi_mapping = mapping;
420
421 mutex_enter(&fsh_map_lock);
422 list_insert_head(&fsh_map, mapping);
423 mutex_exit(&fsh_map_lock);
424
425 return (handle);
426 }
427
428 /*
429 * Used for removing a hook set.
430 *
431 * This function must NOT be called in a hook.
432 *
433 * Returns (-1) if hook wasn't found, 0 otherwise.
434 */
435 int
436 fsh_hook_remove(fsh_handle_t handle)
437 {
438 fsh_fsrecord_t *fsrecp;
439 fsh_mapping_t *mapping;
440
441 /* For more info about the fsh_remove_lock, see fsh_fsrec_destroy() */
442 mutex_enter(&fsh_remove_lock);
443 mutex_enter(&fsh_map_lock);
444 for (mapping = list_head(&fsh_map); mapping != NULL;
445 mapping = list_next(&fsh_map, mapping)) {
446 if (mapping->fshm_handle == handle) {
447 list_remove(&fsh_map, mapping);
448 break;
449 }
450 }
451 mutex_exit(&fsh_map_lock);
452 if (mapping == NULL) {
453 mutex_exit(&fsh_remove_lock);
454 return (-1);
455 }
456
457 ASSERT(mapping->fshm_fshi->fshi_handle == handle);
458
459 /*
460 * We don't have to call fsh_prepare_fsrec() here. fsh_fsrecord_t
461 * is already initialised, because we've found a mapping for the given
462 * handle. We instead make two ASSERTs.
463 */
464 fsrecp = mapping->fshm_vfsp->vfs_fshrecord;
465 ASSERT(fsrecp != NULL);
466 ASSERT(fsrecp != fsh_res_ptr);
467
468 /* If it is called inside of a hook, causes deadlock. */
469 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
470 list_remove(&fsrecp->fshfsr_list, mapping->fshm_fshi);
471 rw_exit(&fsrecp->fshfsr_lock);
472
473 mutex_exit(&fsh_remove_lock);
474
475 id_free(fsh_idspace, handle);
476
477 kmem_free(mapping->fshm_fshi, sizeof (*mapping->fshm_fshi));
478 kmem_free(mapping, sizeof (*mapping));
479
480 return (0);
481 }
482
483 /*
484 * API for installing global mount/free callbacks.
485 *
486 * fsh_callback_t fields:
487 * fshc_arg - argument passed to the callbacks
488 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
489 * drops to 0
490 * fshc_mount - callback fired right before returning from domount()
491 * The first argument of these callbacks is the vfs_t that is mounted/freed.
492 * The second one is the fshc_arg.
493 *
494 * fsh_callback_handle_t is filled out by this function.
495 *
496 * This function must NOT be called in a callback, because it will cause
497 * a deadlock.
498 *
499 * Returns (-1) if hook/callback limit exceeded.
500 */
501 fsh_callback_handle_t
502 fsh_callback_install(fsh_callback_t *callback)
503 {
504 fsh_callback_int_t *fshci;
505 fsh_callback_handle_t handle;
506
507 if ((handle = id_alloc(fsh_idspace)) == -1)
508 return (-1);
509
510 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
511 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
512 fshci->fshci_handle = handle;
513
514 /* If it is called in a {mount,free} callback, causes deadlock. */
515 rw_enter(&fsh_cblist_lock, RW_WRITER);
516 list_insert_head(&fsh_cblist, fshci);
517 rw_exit(&fsh_cblist_lock);
518
519 return (handle);
520 }
521
522 /*
523 * API for removing global mount/free callbacks.
524 *
525 * This function must NOT be called in a callback, because it will cause
526 * a deadlock.
527 *
528 * Returns (-1) if callback wasn't found, 0 otherwise.
529 */
530 int
531 fsh_callback_remove(fsh_callback_handle_t handle)
532 {
533 fsh_callback_int_t *fshci;
534
535 /* If it is called in a {mount,free} callback, causes deadlock. */
536 rw_enter(&fsh_cblist_lock, RW_WRITER);
537 for (fshci = list_head(&fsh_cblist); fshci != NULL;
538 fshci = list_next(&fsh_cblist, fshci)) {
539 if (fshci->fshci_handle == handle) {
540 list_remove(&fsh_cblist, fshci);
541 break;
542 }
543 }
544 rw_exit(&fsh_cblist_lock);
545
546 if (fshci == NULL)
547 return (-1);
548
549 kmem_free(fshci, sizeof (*fshci));
550 id_free(fsh_idspace, handle);
551
552 return (0);
553 }
554
555 /*
556 * This function is executed right before returning from domount()@vfs.c.
557 * We are sure that it's called only after fsh_init().
558 * It executes all the mount callbacks installed in the fsh.
559 *
560 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
561 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
562 * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
563 * section in the big theory statement at the top of this file.
564 */
565 void
566 fsh_exec_mount_callbacks(vfs_t *vfsp)
567 {
568 fsh_callback_int_t *fshci;
569 fsh_callback_t *cb;
570
571 rw_enter(&fsh_cblist_lock, RW_READER);
572 for (fshci = list_head(&fsh_cblist); fshci != NULL;
573 fshci = list_next(&fsh_cblist, fshci)) {
574 cb = &fshci->fshci_cb;
575 if (cb->fshc_mount != NULL)
576 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
577 }
578 rw_exit(&fsh_cblist_lock);
579 }
580
581 /*
582 * This function is executed right before VFS_FREEVFS() is called in
583 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
584 * It executes all the free callbacks installed in the fsh.
585 *
586 * free() callback is the point where the handles associated with the hooks
587 * installed on this vfs_t become invalid
588 */
589 void
590 fsh_exec_free_callbacks(vfs_t *vfsp)
591 {
592 fsh_callback_int_t *fshci;
593 fsh_callback_t *cb;
594
595 rw_enter(&fsh_cblist_lock, RW_READER);
596 for (fshci = list_head(&fsh_cblist); fshci != NULL;
597 fshci = list_next(&fsh_cblist, fshci)) {
598 cb = &fshci->fshci_cb;
599 if (cb->fshc_free != NULL)
600 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
601 }
602 rw_exit(&fsh_cblist_lock);
603 }
604
605 /*
606 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
607 *
608 * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
609 * absolutely necessary to call fsh_init() before using them. That's done in
610 * vfsinit().
611 *
612 * While these functions are executing, it's expected that necessary vfs_t's
613 * are held so that vfs_free() isn't called. vfs_free() expects that noone
614 * else accesses vfs_fshrecord of a given vfs_t.
615 * It's also the caller responsibility to keep vnode_t passed to fsh_foo()
616 * alive and valid.
617 * All these expectations are met because these functions are used only in
618 * correspondng {fop,fsop}_foo() functions.
619 */
620 int
621 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
622 caller_context_t *ct)
623 {
624 int ret;
625 fsh_fsrecord_t *fsrecp;
626
627 fsh_prepare_fsrec(vp->v_vfsp);
628 fsrecp = vp->v_vfsp->vfs_fshrecord;
629
630 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
631 if (!(fsrecp->fshfsr_enabled)) {
632 rw_exit(&fsrecp->fshfsr_lock);
633 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
634 }
635
636 ret = fsh_next_read(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
637 cr, ct);
638 rw_exit(&fsrecp->fshfsr_lock);
639
640 return (ret);
641 }
642
643 int
644 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
645 caller_context_t *ct)
646 {
647 int ret;
648 fsh_fsrecord_t *fsrecp;
649
650 fsh_prepare_fsrec(vp->v_vfsp);
651 fsrecp = vp->v_vfsp->vfs_fshrecord;
652
653 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
654 if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
655 rw_exit(&fsrecp->fshfsr_lock);
656 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
657 }
658
659 ret = fsh_next_write(list_head(&fsrecp->fshfsr_list), vp, uiop, ioflag,
660 cr, ct);
661 rw_exit(&fsrecp->fshfsr_lock);
662
663 return (ret);
664 }
665
666 int
667 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
668 {
669 fsh_fsrecord_t *fsrecp;
670 int ret;
671
672 fsh_prepare_fsrec(vfsp);
673 fsrecp = vfsp->vfs_fshrecord;
674
675 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
676 if (!(fsrecp->fshfsr_enabled)) {
677 rw_exit(&fsrecp->fshfsr_lock);
678 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
679 }
680
681 ret = fsh_next_mount(list_head(&fsrecp->fshfsr_list), vfsp, mvp, uap,
682 cr);
683 rw_exit(&fsrecp->fshfsr_lock);
684
685 return (ret);
686 }
687
688 int
689 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
690 {
691 fsh_fsrecord_t *fsrecp;
692 int ret;
693
694 fsh_prepare_fsrec(vfsp);
695 fsrecp = vfsp->vfs_fshrecord;
696
697 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
698 if (!(fsrecp->fshfsr_enabled)) {
699 rw_exit(&fsrecp->fshfsr_lock);
700 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
701 }
702
703 ret = fsh_next_unmount(list_head(&fsrecp->fshfsr_list), vfsp, flag, cr);
704 rw_exit(&fsrecp->fshfsr_lock);
705
706 return (ret);
707 }
708
709 /*
710 * This is the funtion used by fsh_prepare_fsrec() to allocate a new
711 * fsh_fsrecord. This function is called by the first function which
712 * access the vfs_fshrecord and finds out it's NULL.
713 */
714 static fsh_fsrecord_t *
715 fsh_fsrec_create()
716 {
717 fsh_fsrecord_t *fsrecp;
718
719 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
720 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
721 offsetof(fsh_int_t, fshi_next));
722 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
723 fsrecp->fshfsr_enabled = 1;
724 return (fsrecp);
725 }
726
727
728 /*
729 * This call can be used ONLY in vfs_free(). It's assumed that no other
730 * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
731 * are executing while a call to fsh_fsrec_destroy() is made. With this
732 * assumptions, no concurrency issues occur.
733 *
734 * Before calling this function outside the fsh, it's sufficient and
735 * required to check if the passed fsh_fsrecord * is not NULL. We don't
736 * have to check if it is not equal to fsh_res_ptr, because all the fsh API
737 * calls involving this vfs_t should end before vfs_free() is called
738 * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
739 * guaranteed by the explicit requirement that the caller of fsh API holds
740 * the vfs_t when needed.
741 *
742 * All the remaining hooks are being removed.
743 */
744 void
745 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
746 {
747 fsh_int_t *fshi;
748
749 VERIFY(fsrecp != NULL);
750
751 /*
752 * Although it is expected that no fsh calls using this vfs_t
753 * would be executing, we would not like to panic if that happens.
754 * That's a client's bug obviously, but we'd like to minimise the
755 * possibility of a system crash. That's why fsh_remove_lock is used
756 * here. Even if fsh_hook_remove() would be called with a handle that is
757 * invalid, it will just return (-1), because it won't find the handle
758 * in fsh_map. here is of course a possibility, that an invalid handle
759 * (id_t internally) would become valid (it would be assigned again),
760 * but this is very rare, because of the fact that id_alloc() uses next
761 * fit strategy to alloc the id_t's.
762 * For more info see fsh_hook_remove()
763 */
764 mutex_enter(&fsh_remove_lock);
765 while ((fshi = list_remove_head(&fsrecp->fshfsr_list)) != NULL) {
766 fsh_mapping_t *mapping = fshi->fshi_mapping;
767
768 ASSERT(mapping->fshm_handle == fshi->fshi_handle);
769 ASSERT(mapping->fshm_fshi == fshi);
770
771 mutex_enter(&fsh_map_lock);
772 list_remove(&fsh_map, mapping);
773 mutex_exit(&fsh_map_lock);
774
775 id_free(fsh_idspace, fshi->fshi_handle);
776
777 kmem_free(fshi, sizeof (*fshi));
778 kmem_free(mapping, sizeof (*mapping));
779 }
780 mutex_exit(&fsh_remove_lock);
781
782 list_destroy(&fsrecp->fshfsr_list);
783 rw_destroy(&fsrecp->fshfsr_lock);
784 kmem_free(fsrecp, sizeof (*fsrecp));
785 }
786
787 /*
788 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
789 * before every other fsh call.
790 */
791 void
792 fsh_init(void)
793 {
794 rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
795 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
796 offsetof(fsh_callback_int_t, fshci_next));
797
798 mutex_init(&fsh_map_lock, NULL, MUTEX_DRIVER, NULL);
799 mutex_init(&fsh_remove_lock, NULL, MUTEX_DRIVER, NULL);
800
801 list_create(&fsh_map, sizeof (fsh_mapping_t),
802 offsetof(fsh_mapping_t, fshm_next));
803
804 /* See comment above fsh_prepare_fsrec() */
805 fsh_res_ptr = (void *)-1;
806
807 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
808 }
809
810 /*
811 * These functions are used to pass control to the next hook or underlying
812 * vop or vfsop. It's client doesn't have to worry about any locking, because
813 * all the necessities are guaranteed by the fsh_foo().
814 *
815 * In fsh_next_foo() we execute the hook passed in the first argument and
816 * try to find the next one. It is guaranteed that the passed hook is still
817 * valid, because of fshfsr_lock held by fsh_foo().
818 */
819 int
820 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
821 cred_t *cr, caller_context_t *ct)
822 {
823 while (fshi != NULL && fshi->fshi_hooks.read == NULL)
824 fshi = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list,
825 fshi);
826
827 if (fshi != NULL)
828 return ((*(fshi->fshi_hooks.read))(
829 list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list, fshi),
830 fshi->fshi_hooks.arg, vp, uiop, ioflag, cr, ct));
831 else
832 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
833 }
834
835 int
836 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
837 cred_t *cr, caller_context_t *ct)
838 {
839 while (fshi != NULL && fshi->fshi_hooks.write == NULL)
840 fshi = list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list,
841 fshi);
842
843 if (fshi != NULL)
844 return ((*(fshi->fshi_hooks.write))(
845 list_next(&vp->v_vfsp->vfs_fshrecord->fshfsr_list, fshi),
846 fshi->fshi_hooks.arg, vp, uiop, ioflag, cr, ct));
847 else
848 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
849 }
850
851 int
852 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
853 cred_t *cr)
854 {
855 while (fshi != NULL && fshi->fshi_hooks.mount == NULL)
856 fshi = list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
857
858 if (fshi != NULL)
859 return ((*(fshi->fshi_hooks.mount))(
860 list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi),
861 fshi->fshi_hooks.arg, vfsp, mvp, uap, cr));
862 else
863 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
864 }
865
866 int
867 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
868 {
869
870 while (fshi != NULL && fshi->fshi_hooks.unmount == NULL)
871 fshi = list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
872
873 if (fshi != NULL)
874 return ((*(fshi->fshi_hooks.unmount))(
875 list_next(&vfsp->vfs_fshrecord->fshfsr_list, fshi),
876 fshi->fshi_hooks.arg, vfsp, flag, cr));
877 else
878 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
879 }