Print this page
Update from fsd_sep3 webrev to fsd_sep9
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/fsh.c
+++ new/usr/src/uts/common/fs/fsh.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2013 Damian Bogel. All rights reserved.
14 14 */
15 15
16 16 #include <sys/debug.h>
17 17 #include <sys/errno.h>
18 18 #include <sys/fsh.h>
19 19 #include <sys/fsh_impl.h>
20 20 #include <sys/id_space.h>
21 21 #include <sys/kmem.h>
22 22 #include <sys/ksynch.h>
23 23 #include <sys/list.h>
24 24 #include <sys/sunddi.h>
25 25 #include <sys/sysmacros.h>
26 26 #include <sys/types.h>
27 27 #include <sys/vfs.h>
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
28 28 #include <sys/vnode.h>
29 29
30 30 /*
31 31 * Filesystem hook framework (fsh)
32 32 *
33 33 * 1. Abstract.
34 34 * The main goal of the filesystem hook framework is to provide an easy way to
35 35 * inject client-defined behaviour into vfs/vnode calls. fsh works on
36 36 * vfs_t granularity.
37 37 *
38 + * Note: In this document, both an fsh_t structure and hooking function for a
39 + * vnodeop/vfsop is referred to as *hook*.
38 40 *
41 + *
39 42 * 2. Overview.
40 43 * fsh_t is the main object in the fsh. An fsh_t is a structure containing:
41 - * - pointers to hooking functions (named after corresponding
42 - * vnodeops/vfsops)
43 - * - a pointer to an argument to pass (this is shared for all the
44 - * hooks in a given fsh_t)
45 - * - a pointer to the *hook remove callback* - it's being fired after a
46 - * hook is removed and the hook has stopped executing. It's safe to destroy
47 - * any data associated with this hook.
44 + * - pointers to hooking functions
45 + * - an argument to pass (this is shared for all the hooks in a given
46 + * fsh_t)
47 + * - a pointer to the *hook remove callback*
48 48 *
49 49 * The information from fsh_t is copied by the fsh and an fsh_handle_t
50 50 * is returned. It should be used for further removing.
51 51 *
52 52 *
53 53 * 3. Usage.
54 - * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions
55 - * are held by the caller when needed. fsh does no vfs_t/vnode_t locking.
54 + * It is expected that vfs_t/vnode_t passed to fsh_foo() functions are held by
55 + * the caller when needed. fsh does no vfs_t/vnode_t locking.
56 56 *
57 - * fsh_t is a structure filled out by the client. If a client does not want
58 - * to add/remove a hook for function foo(), he should fill the foo field of
59 - * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with
60 - * two additional arguments:
61 - * - fsh_int_t *fsh_int - this argument MUST be passed to
62 - * hook_next_foo(). fsh wouldn't know which hook to execute next
63 - * without it
64 - * - void *arg - this is the argument passed with fsh_t during
65 - * installation
66 - * - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback
67 - * (mentioned earlier); it's first argument is arg, the second is the
68 - * handle
57 + * fsh_t is a structure filled out by the client. It contains:
58 + * - pointers to hooking functions
59 + * - the argument passed to the hooks
60 + * - the *hook remove callback*
69 61 *
62 + * If a client does not want to add a hook for function foo(), he should fill
63 + * corresponding fields with NULLs. For every vfsop/vnodeop there are two
64 + * fields: pre_foo() and post_foo(). These are the functions called before and
65 + * after the next hook or underlying vfsop/vnodeop.
66 + *
67 + * Pre hooks take:
68 + * - arg
69 + * - pointer to a field containing void* - it should be filled whenever
70 + * the client wants to have some data shared by the pre and post hooks in
71 + * the same syscall execution. This is called the *instance data*.
72 + * - pointers to the arguments passed to the underlying vfsop/vnodeop
73 + * Pre hooks return void.
74 + *
75 + * Post hooks take:
76 + * - value returned by the previous post hook or underlying vfsop/vnodeop
77 + * - arg
78 + * - pointer to the *instance data*
79 + * - arguments passed to the underlying vfsop/vnodeop
80 + * Post hooks return an int, which should be treated as the vfsop/vnodeop
81 + * return value.
82 + * Memory allocated by pre hook must be deallocated by the post hook.
83 + *
84 + * Execution path of hooks A, B, C is as follows:
85 + * foo()
86 + * preA(argA, &instancepA, ...);
87 + * preB(argB, &instancepB, ...);
88 + * preC(argC, &instancepC, ...);
89 + * ret = VOP_FOO();
90 + * ret = postC(ret, argC, instancepC, ...);
91 + * ret = postB(ret, argB, instancepB, ...);
92 + * ret = postC(ret, argA, instancepA, ...);
93 + * return (ret);
94 + *
70 95 * After installation, an fsh_handle_t is returned to the caller.
71 96 *
72 - * Every hook function is responsible for passing the control to the next
73 - * hook associated with a particular call. In order to provide an easy way to
74 - * modify the behaviour of a function call both before and after the
75 - * underlying vfsop/vnodeop (or next hook) execution, a hook has to call
76 - * fsh_next_foo() at some point. This function does necessary internal
77 - * operations and calls the next hook, until there's no hook left, then it
78 - * calls the underlying vfsop/vnodeop.
79 - * Example:
80 - * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) {
81 - * cmn_err(CE_NOTE, "freefs called!\n");
82 - * return (fsh_next_freefs(fsh_int, vfsp));
83 - * }
97 + * Hook remove callback - it's a function being fired after a hook is removed
98 + * and no thread is going to execute it anymore. It's safe to destroy all the
99 + * data associated with this hook inside it.
84 100 *
101 + * It is guaranteed, that whenever a pre_hook() is called, there will be also
102 + * post_hook() called within the same syscall.
85 103 *
86 - * A client might want to fire callbacks when vfs_t's are being mounted
104 + * If a hook (HNew) is installed/removed on/from a vfs_t within execution of
105 + * another hook (HExec) installed on this vfs_t, the syscall that executes
106 + * HExec won't fire HNew.
107 + *
108 + * A client might want to fire callbacks when vfs_ts are being mounted
87 109 * or freed. There's an fsh_callback_t structure provided to install such
88 110 * callbacks along with the API.
89 111 * It is legal to call fsh_hook_{install,remove}() inside a mount callback
90 112 * WITHOUT holding the vfs_t.
91 113 *
92 114 * After vfs_t's free callback returns, all the handles associated with the
93 115 * hooks installed on this vfs_t are invalid and must not be used.
94 116 *
95 - *
96 117 * 4. API
97 118 * None of the APIs should be called during interrupt context above lock
98 - * level. The only exceptions are fsh_next_foo() functions, which do not use
99 - * locks.
119 + * level.
100 120 *
101 121 * a) fsh.h
102 - * Any of these functions could be called inside a hook or a hook remove
103 - * callback.
104 - * fsh_callback_{install,remove}() must not be called inside a {mount,free}
105 - * callback. Doing so will cause a deadlock. Other functions can be called
106 - * inside {mount,free} callbacks.
122 + * Any of these functions could be called in a hook or a hook remove callback.
123 + * The only functions that must not be called inside a {mount,free} callback are
124 + * fsd_callback_{install,remove}. Using them will cause a deadlock.
107 125 *
126 + *
108 127 * fsh_fs_enable(vfs_t *vfsp)
109 128 * fsh_fs_disable(vfs_t *vfsp)
110 129 * Enables/disables fsh for a given vfs_t.
111 130 *
112 131 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
113 132 * Installs hooks on vfsp filesystem.
114 133 * It's important that hooks are executed in LIFO installation order,
115 134 * which means that if there are hooks A and B installed in this order, B
116 135 * is going to be executed before A.
117 136 * It returns a correct handle, or (-1) if hook/callback limit exceeded.
118 137 * The handle is valid until a free callback returns or an explicit call
119 138 * to fsh_hook_remove().
120 139 *
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
121 140 * fsh_hook_remove(fsh_handle_t handle)
122 141 * Removes a hook and invalidates the handle.
123 142 * It is guaranteed that after this funcion returns, calls to
124 143 * vnodeops/vfsops won't go through this hook, although there might be
125 144 * some threads still executing this hook. When hook remove callback is
126 145 * fired, it is guaranteed that the hook won't be executed anymore. It is
127 146 * safe to remove all the internal data associated with this hook inside
128 147 * the hook remove callback. The hook remove callback could be called
129 148 * inside fsh_hook_remove().
130 149 *
131 - * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS)
132 - * This is the function which should be called once in every hook. It
133 - * does the necessary internal operations and passes control to the
134 - * next hook or, if there's no hook left, to the underlying
135 - * vfsop/vnodeop.
136 150 *
137 151 * fsh_callback_install(fsh_callback_t *callback)
138 152 * fsh_callback_remove(fsh_callback_handle_t handle)
139 153 * Installs/removes callbacks for vfs_t mount/free. The mount callback
140 154 * is executed right before domount() returns. The free callback is
141 155 * called right before VFS_FREEVFS() is called.
142 156 * The fsh_callback_install() returns a correct handle, or (-1) if
143 157 * hook/callback limit exceeded.
144 158 *
159 + *
145 160 * b) fsh_impl.h (for vfs.c and vnode.c only)
146 161 * fsh_init()
147 162 * This call has to be done in vfsinit(). It initialises the fsh. It
148 163 * is absolutely necessary that this call is made before any other fsh
149 164 * operation.
150 165 *
151 166 * fsh_exec_mount_callbacks(vfs_t *vfsp)
152 167 * fsh_exec_free_callbacks(vfs_t *vfsp)
153 168 * Used to execute all fsh callbacks for {mount,free} of a vfs_t.
154 169 *
155 170 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp)
156 171 * Destroys an fsh_fsrecord structure. All the hooks installed on this
157 172 * vfs_t are then destroyed. free callback is called before this function.
158 173 *
159 174 * fsh_foo(ARGUMENTS)
160 - * Function used to start executing the hook chain for a given call.
175 + * Function used to execute the hook chain for a given syscall.
161 176 *
162 177 *
163 178 * 5. Internals.
164 179 * fsh_int_t is an internal hook structure. It is reference counted.
165 180 * fshi_hold() and fshi_rele() should be used whenever needed.
166 181 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list
167 182 * (local to vfs_t). All entries are unique and are identified by fshi_handle.
168 183 *
169 184 * fsh_int_t properties:
170 185 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both
171 186 * fsh_map and fshfsr_list
172 187 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook
173 188 * from fsh_map and marks the hook as *doomed*
174 189 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread
175 190 * executing it
176 191 * - if fsh_int_t is marked as *doomed*, the reference counter is not
177 192 * be increased and thus no thread can acquire this fsh_int_t
178 193 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this
179 194 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map
180 195 * - fsh_int_t could be also destroyed without fsh_hook_remove() call,
181 196 * that happens only inside fsh_fsrec_destroy() where it is guaranteed
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
182 197 * that there is no thread executing the hook
183 198 *
184 199 *
185 200 * fsh_fsrecord_t is a structure which lives inside a vfs_t.
186 201 * fsh_fsrecord_t contains:
187 202 * - an rw-lock that protects the structure
188 203 * - a list of hooks installed on this vfs_t
189 204 * - a flag which tells whether fsh is enabled on this vfs_t
190 205 *
191 206 *
192 - * fsh_prepare_fsrec rule:
207 + * fsh_fsrec_prepare rule:
193 208 * Every function that needs vfsp->vfs_fshrecord has to call
194 - * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to
209 + * fsh_fsrec_prepare() first. If and only if the call is made, it is safe to
195 210 * use vfsp->vfs_fshrecord.
196 211 *
197 212 * Unfortunately, because of unexpected behaviour of some filesystems (no use
198 213 * of vfs_alloc()/vfs_init()) there's no good place to initialise the
199 214 * fsh_fshrecord_t structure. The approach being used here is to check if it's
200 215 * initialised in every call. Because of the fact that no lock could be used
201 216 * here (the same problem with initialisation), a spinlock is used. This is
202 - * explained in more detail in a comment before fsh_prepare_fsrec(). After
217 + * explained in more detail in a comment before fsh_fsrec_prepare(). After
203 218 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord
204 219 * pointer locally, because it won't be changed until vfs_free() is called.
205 220 *
206 - * The only exception from the fsh_prepare_fsrec() rule is vfs_free(),
207 - * where there is expected that no other fsh calls would be made for the
221 + * Exceptions from this rule:
222 + * - vfs_free() - it is expected that no other fsh calls would be made for the
208 223 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a
209 224 * valid pointer and could not be concurrently accessed.
225 + * - fshi_rele() - fsh_hook_install() comes before first fshi_rele() call;
226 + * the fsh_fsrecord_t has been initialised there
210 227 *
228 + *
211 229 * When there are no fsh functions (that use a particular fsh_fsrecord_t)
212 230 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It
213 231 * would be NULL or a pointer to an initialised fsh_fsrecord_t.
214 232 *
233 + * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
234 + * passing it to fsh_fsrec_destroy. We don't have to check if it is not equal
235 + * to fsh_res_ptr, because all the fsh API calls involving this vfs_t should
236 + * end before vfs_free() is called (outside the fsh, fsh_fsrecord is never
237 + * equal to fsh_res_ptr). That is guaranteed by the explicit requirement that
238 + * the caller of fsh API holds the vfs_t when needed. fsh_hook_remove() must not
239 + * be called either, because the handles are invalidated after free callback has
240 + * fired.
215 241 *
242 + *
216 243 * Callbacks:
217 244 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right
218 245 * before returning from domount()@vfs.c.
219 246 *
220 247 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right
221 248 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0.
222 249 *
223 250 *
224 - * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS)
225 - * This function is quite simple. It takes the fsh_int_t and passes control
226 - * to the next hook or to the underlying vnodeop/vfsop.
227 - *
228 - *
229 251 * 6. Locking
230 252 * a) public
231 253 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is
232 254 * needed, the client does that.
233 255 *
234 - * fsh_callback_{install,remove} must not be called inside a callback, because
235 - * it will cause a deadlock.
256 + * No locks are held across hooks or hook remove callbacks execution. It is
257 + * safe to use fsh API inside hooks and hook remove callbacks.
236 258 *
237 - * b) internal
259 + * fsh_cb_lock is held across {mount,free} callbacks. Calling
260 + * fsh_callback_{install,remove} inside of a callback will cause a deadlock.
261 + *
262 + * b) internals
238 263 * Locking diagram:
239 264 *
240 - * fsh_hook_install() fsh_hook_remove() fsh_fsrec_destroy()
241 - * | | |
242 - * | | |
243 - * +------------------+ | +------------+
244 - * | | |
245 - * V V V
246 - * fsh_lock
247 - * | |
248 - * | +----- fshfsr_lock, RW_WRITER ---+
249 - * | |
250 - * V |
251 - * +---------------------------------------+ |
252 - * | fsh_map | |
253 - * | | |
254 - * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+
265 + * fsh_hook_remove() fsh_hook_install() fsh_fsrec_destroy()
266 + * | | |
267 + * | | |
268 + * +------------------+ | +------------+
269 + * | | | |
270 + * | V | |
271 + * V +------------|---|-+
272 + * fshi_rele() | fsh_lock | | |
273 + * (sometimes) +------------|---|-+
274 + * | | |
275 + * | +---+-- fshfsr_lock, RW_WRITER -+
276 + * | |
277 + * V |
278 + * +---------------------------------------+ |
279 + * | fsh_map | |
280 + * | | |
281 + * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|----------------+
255 282 * | +------------------------------^--------+
256 283 * | |
257 284 * | |
258 285 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER
259 286 * | |
260 287 * | |
261 288 * fsh_read(), fshi_rele()
262 289 * fsh_write(),
263 - * ..., Might be called from:
264 - * fsh_next_read(), fsh_hook_remove()
265 - * fsh_next_write(), fsh_read(), fsh_write(), ...
266 - * ... fsh_next_read(), fsh_next_write(), ...
290 + * ... Might be called from:
291 + * fsh_hook_remove()
292 + * fsh_read(), fsh_write(), ...
267 293 *
294 + *
268 295 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install,
269 296 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since
270 297 * it destroys the unremoved hooks). It is used only when fsh_map needs to be
271 298 * locked. The usage of this lock guarantees that the data in fsh_map and
272 299 * fshfsr_lists is consistent.
300 + *
301 + * In order to make calling callbacks inside callbacks possible, fsh_cb_owner is
302 + * set by fsh_exec_{mount,free} callbacks to the thread that owns the
303 + * fsh_cb_lock. It's always checked if we are owners of the mutex before
304 + * entering it.
305 + *
273 306 */
274 307
275 308
276 309 /* Internals */
277 -struct fsh_int {
310 +typedef struct fsh_int {
278 311 fsh_handle_t fshi_handle;
279 312 fsh_t fshi_hooks;
280 313 vfs_t *fshi_vfsp;
281 314
282 315 kmutex_t fshi_lock;
283 316 uint64_t fshi_ref;
284 317 uint64_t fshi_doomed; /* changed inside fsh_lock */
285 318
286 319 /* next node in fshfsr_list */
287 - list_node_t fshi_next;
320 + list_node_t fshi_node;
288 321
289 322 /* next node in fsh_map */
290 323 list_node_t fshi_global;
291 -};
324 +} fsh_int_t;
292 325
293 326 typedef struct fsh_callback_int {
294 327 fsh_callback_t fshci_cb;
295 328 fsh_callback_handle_t fshci_handle;
296 - list_node_t fshci_next;
329 + list_node_t fshci_node;
297 330 } fsh_callback_int_t;
298 331
299 332
333 +typedef struct fsh_exec {
334 + fsh_int_t *fshe_fshi;
335 + void *fshe_instance;
336 + list_node_t fshe_node;
337 +} fsh_exec_t;
338 +
339 +
300 340 static kmutex_t fsh_lock;
301 341
302 342 /*
303 343 * fsh_fsrecord_t is the main internal structure. It's content is protected
304 344 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for
305 345 * the vfs_t that contains the fsh_fsrecord_t.
306 346 */
307 347 struct fsh_fsrecord {
308 348 krwlock_t fshfsr_lock;
309 349 int fshfsr_enabled;
310 350 list_t fshfsr_list;
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
311 351 };
312 352
313 353 /*
314 354 * Global list of fsh_int_t. Protected by fsh_lock.
315 355 */
316 356 static list_t fsh_map;
317 357
318 358 /*
319 359 * Global list of fsh_callback_int_t.
320 360 */
321 -static krwlock_t fsh_cblist_lock;
361 +static kmutex_t fsh_cb_lock;
362 +static kmutex_t fsh_cb_owner_lock;
363 +static kthread_t *fsh_cb_owner;
322 364 static list_t fsh_cblist;
323 365
324 366 /*
325 367 * A reserved pointer for fsh purposes. It is used because of the method
326 368 * chosen for solving concurrency issues with vfs_fshrecord. The full
327 369 * explanation is in the big theory statement at the beginning of this
328 370 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init().
329 371 */
330 372 static void *fsh_res_ptr;
331 373
332 374 static fsh_fsrecord_t *fsh_fsrec_create();
333 375
334 376 int fsh_limit = INT_MAX;
335 377 static id_space_t *fsh_idspace;
336 378
337 379 /*
338 - * fsh_prepare_fsrec()
380 + * fsh_fsrec_prepare()
339 381 *
340 382 * Important note:
341 383 * Before using this function, fsh_init() MUST be called. We do that in
342 384 * vfsinit()@vfs.c.
343 385 *
344 386 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the
345 387 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not
346 388 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled
347 389 * filesystems could do the same thing. That's why this solution is
348 390 * introduced. It should be called before any code that needs access to
349 391 * vfs_fshrecord.
350 392 *
351 393 * Locking:
352 394 * There are no locks here, because there's no good place to initialise
353 395 * the lock. Concurrency issues are solved by using atomic instructions
354 396 * and a spinlock, which is spinning only once for a given vfs_t. Because
355 397 * of that, the usage of the spinlock isn't bad at all.
356 398 *
357 399 * How it works:
358 400 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
359 401 * fsh_res_ptr. That's a signal for other threads, that the structure
360 402 * is being initialised.
361 403 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait,
362 404 * because vfs_fshrecord is being initialised by another call.
363 405 * c) other cases:
364 406 * vfs_fshrecord is already initialised, so we can use it. It won't change
365 407 * until vfs_free() is called. It can't happen when someone is holding
366 408 * the vfs_t, which is expected from the caller of fsh API.
367 409 */
368 410 static void
369 -fsh_prepare_fsrec(vfs_t *vfsp)
411 +fsh_fsrec_prepare(vfs_t *vfsp)
370 412 {
371 413 fsh_fsrecord_t *fsrec;
372 414
373 415 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL,
374 416 fsh_res_ptr)) == fsh_res_ptr)
375 417 ;
376 418
377 419 if (fsrec == NULL)
378 420 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create());
379 421 }
380 422
381 423 /*
382 424 * API for enabling/disabling fsh per vfs_t.
383 425 *
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
384 426 * A newly created vfs_t has fsh enabled by default. If one would want to change
385 427 * this behaviour, mount callbacks could be used.
386 428 *
387 429 * The caller is expected to hold the vfs_t.
388 430 *
389 431 * These functions must NOT be called in a hook.
390 432 */
391 433 void
392 434 fsh_fs_enable(vfs_t *vfsp)
393 435 {
394 - fsh_prepare_fsrec(vfsp);
436 + fsh_fsrec_prepare(vfsp);
395 437
396 438 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
397 439 vfsp->vfs_fshrecord->fshfsr_enabled = 1;
398 440 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
399 441 }
400 442
401 443 void
402 444 fsh_fs_disable(vfs_t *vfsp)
403 445 {
404 - fsh_prepare_fsrec(vfsp);
446 + fsh_fsrec_prepare(vfsp);
405 447
406 448 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
407 449 vfsp->vfs_fshrecord->fshfsr_enabled = 0;
408 450 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
409 451 }
410 452
411 453 /*
412 454 * API used for installing hooks. fsh_handle_t is returned for further
413 455 * actions (currently just removing) on this set of hooks.
414 456 *
415 - * fsh_t fields:
416 - * - arg - argument passed to every hook
417 - * - remove_cb - remove callback, called after a hook is removed and all the
418 - * threads stops executing it
419 - * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops;
420 - * if there is no hook desired for an operation, it should be set to
421 - * NULL
422 - *
423 457 * It's important that the hooks are executed in LIFO installation order (they
424 458 * are added to the head of the hook list).
425 459 *
426 460 * The caller is expected to hold the vfs_t.
427 461 *
428 462 * Returns (-1) if hook/callback limit exceeded, handle otherwise.
429 463 */
430 464 fsh_handle_t
431 465 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks)
432 466 {
433 467 fsh_handle_t handle;
434 468 fsh_int_t *fshi;
435 469
436 - fsh_prepare_fsrec(vfsp);
470 + fsh_fsrec_prepare(vfsp);
437 471
438 472 if ((handle = id_alloc(fsh_idspace)) == -1)
439 473 return (-1);
440 474
441 475 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP);
442 476 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL);
443 477 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks));
444 478 fshi->fshi_handle = handle;
445 479 fshi->fshi_doomed = 0;
446 480 fshi->fshi_ref = 1;
447 481 fshi->fshi_vfsp = vfsp;
448 482
449 483 mutex_enter(&fsh_lock);
450 484 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER);
451 485 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi);
452 486 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock);
453 487
454 488 list_insert_head(&fsh_map, fshi);
455 489 mutex_exit(&fsh_lock);
456 490
457 491 return (handle);
458 492 }
459 493
460 494 static int
461 495 fshi_hold(fsh_int_t *fshi)
462 496 {
463 497 int can_hold;
464 498
465 499 mutex_enter(&fshi->fshi_lock);
466 500 if (fshi->fshi_doomed == 1) {
467 501 can_hold = 0;
468 502 } else {
469 503 fshi->fshi_ref++;
470 504 can_hold = 1;
471 505 }
472 506 mutex_exit(&fshi->fshi_lock);
473 507
474 508 return (can_hold);
475 509 }
476 510
477 511 /*
478 512 * This function must not be called while fshfsr_lock is held. Doing so could
479 513 * cause a deadlock.
480 514 */
481 515 static void
482 516 fshi_rele(fsh_int_t *fshi)
483 517 {
484 518 int destroy;
485 519
486 520 mutex_enter(&fshi->fshi_lock);
487 521 ASSERT(fshi->fshi_ref > 0);
488 522 fshi->fshi_ref--;
489 523 if (fshi->fshi_ref == 0) {
490 524 ASSERT(fshi->fshi_doomed == 1);
491 525 destroy = 1;
↓ open down ↓ |
45 lines elided |
↑ open up ↑ |
492 526 } else {
493 527 destroy = 0;
494 528 }
495 529 mutex_exit(&fshi->fshi_lock);
496 530
497 531 if (destroy) {
498 532 /*
499 533 * At this point, we are sure that fsh_hook_remove() has been
500 534 * called, that's why we don't remove the fshi from fsh_map.
501 535 * fsh_hook_remove() did that already.
536 + * There is also no need to call fsh_fsrec_prepare() here.
502 537 */
503 538 fsh_fsrecord_t *fsrecp;
504 539
505 - if (fshi->fshi_hooks.remove_cb != NULL)
506 - (*fshi->fshi_hooks.remove_cb)(
507 - fshi->fshi_hooks.arg, fshi->fshi_handle);
508 540 /*
509 - * We don't have to call fsh_prepare_fsrec() here.
541 + * We don't have to call fsh_fsrec_prepare() here.
510 542 * fsh_fsrecord_t is already initialised, because we've found a
511 543 * mapping for the given handle.
512 544 */
513 545 fsrecp = fshi->fshi_vfsp->vfs_fshrecord;
514 546 ASSERT(fsrecp != NULL);
515 547 ASSERT(fsrecp != fsh_res_ptr);
516 548
517 549 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
518 550 list_remove(&fsrecp->fshfsr_list, fshi);
519 551 rw_exit(&fsrecp->fshfsr_lock);
520 552
553 + if (fshi->fshi_hooks.remove_cb != NULL)
554 + (*fshi->fshi_hooks.remove_cb)(
555 + fshi->fshi_hooks.arg, fshi->fshi_handle);
556 +
521 557 id_free(fsh_idspace, fshi->fshi_handle);
522 558 mutex_destroy(&fshi->fshi_lock);
523 559 kmem_free(fshi, sizeof (*fshi));
524 560 }
525 561 }
526 562
527 563 /*
528 564 * Used for removing a hook set.
529 565 *
530 566 * fsh_hook_remove() invalidates the given handle.
531 567 *
532 568 * It is guaranteed, that after successful return from fsh_hook_remove(),
533 569 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't
534 570 * go through this hook.
535 571 *
536 572 * There is no guarantee that after fsh_hook_remove() returns, the hook
537 573 * associated with the handle won't be executing. Instead, it is guaranteed that
538 574 * when remove_cb() is called, the hook finished it's execution in all threads.
539 575 * It is safe to destroy all internal data associated with this hook inside
540 576 * remove_cb().
541 577 *
542 578 * It is possible that remove_cb() would be called before fsh_hook_remove()
543 579 * returns.
544 580 *
545 581 * Returns (-1) if hook wasn't found, 0 otherwise.
546 582 */
547 583 int
548 584 fsh_hook_remove(fsh_handle_t handle)
549 585 {
550 586 fsh_int_t *fshi;
551 587
552 588 mutex_enter(&fsh_lock);
553 589 for (fshi = list_head(&fsh_map); fshi != NULL;
554 590 fshi = list_next(&fsh_map, fshi)) {
555 591 if (fshi->fshi_handle == handle) {
556 592 list_remove(&fsh_map, fshi);
557 593 break;
558 594 }
559 595 }
560 596
561 597 if (fshi == NULL)
562 598 return (-1);
563 599
564 600 mutex_enter(&fshi->fshi_lock);
565 601 ASSERT(fshi->fshi_doomed == 0);
566 602 fshi->fshi_doomed = 1;
567 603 mutex_exit(&fshi->fshi_lock);
568 604 mutex_exit(&fsh_lock);
569 605
570 606 fshi_rele(fshi);
571 607
572 608 return (0);
573 609 }
574 610
575 611 /*
576 612 * API for installing global mount/free callbacks.
577 613 *
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
578 614 * fsh_callback_t fields:
579 615 * fshc_arg - argument passed to the callbacks
580 616 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count
581 617 * drops to 0
582 618 * fshc_mount - callback fired right before returning from domount()
583 619 * The first argument of these callbacks is the vfs_t that is mounted/freed.
584 620 * The second one is the fshc_arg.
585 621 *
586 622 * fsh_callback_handle_t is filled out by this function.
587 623 *
588 - * This function must NOT be called in a callback, because it will cause
589 - * a deadlock.
590 - *
591 624 * Returns (-1) if hook/callback limit exceeded.
625 + *
626 + * Calling this function in a {mount,free} callback will cause a deadlock.
592 627 */
593 628 fsh_callback_handle_t
594 629 fsh_callback_install(fsh_callback_t *callback)
595 630 {
596 631 fsh_callback_int_t *fshci;
597 632 fsh_callback_handle_t handle;
598 633
599 634 if ((handle = id_alloc(fsh_idspace)) == -1)
600 635 return (-1);
601 636
602 637 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP);
603 638 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb));
604 639 fshci->fshci_handle = handle;
605 640
606 - /* If it is called in a {mount,free} callback, causes deadlock. */
607 - rw_enter(&fsh_cblist_lock, RW_WRITER);
641 + mutex_enter(&fsh_cb_lock);
608 642 list_insert_head(&fsh_cblist, fshci);
609 - rw_exit(&fsh_cblist_lock);
643 + mutex_exit(&fsh_cb_lock);
610 644
611 645 return (handle);
612 646 }
613 647
614 648 /*
615 649 * API for removing global mount/free callbacks.
616 650 *
617 - * This function must NOT be called in a callback, because it will cause
618 - * a deadlock.
619 - *
620 651 * Returns (-1) if callback wasn't found, 0 otherwise.
652 + *
653 + * Calling this function in a {mount,free} callback will cause a deadlock.
621 654 */
622 655 int
623 656 fsh_callback_remove(fsh_callback_handle_t handle)
624 657 {
625 658 fsh_callback_int_t *fshci;
626 659
627 - /* If it is called in a {mount,free} callback, causes deadlock. */
628 - rw_enter(&fsh_cblist_lock, RW_WRITER);
660 + mutex_enter(&fsh_cb_lock);
661 +
629 662 for (fshci = list_head(&fsh_cblist); fshci != NULL;
630 663 fshci = list_next(&fsh_cblist, fshci)) {
631 664 if (fshci->fshci_handle == handle) {
632 665 list_remove(&fsh_cblist, fshci);
633 666 break;
634 667 }
635 668 }
636 - rw_exit(&fsh_cblist_lock);
637 669
670 + mutex_exit(&fsh_cb_lock);
671 +
638 672 if (fshci == NULL)
639 673 return (-1);
640 674
641 675 kmem_free(fshci, sizeof (*fshci));
642 676 id_free(fsh_idspace, handle);
643 677
644 678 return (0);
645 679 }
646 680
647 681 /*
648 682 * This function is executed right before returning from domount()@vfs.c.
649 683 * We are sure that it's called only after fsh_init().
650 684 * It executes all the mount callbacks installed in the fsh.
651 685 *
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
652 686 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal
653 687 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding
654 688 * this vfs_t. This guarantee should be preserved, because it's in the "Usage"
655 689 * section in the big theory statement at the top of this file.
656 690 */
657 691 void
658 692 fsh_exec_mount_callbacks(vfs_t *vfsp)
659 693 {
660 694 fsh_callback_int_t *fshci;
661 695 fsh_callback_t *cb;
696 + int fsh_context;
662 697
663 - rw_enter(&fsh_cblist_lock, RW_READER);
698 + mutex_enter(&fsh_cb_owner_lock);
699 + fsh_context = fsh_cb_owner == curthread;
700 + mutex_exit(&fsh_cb_owner_lock);
701 +
702 + if (!fsh_context) {
703 + mutex_enter(&fsh_cb_lock);
704 + mutex_enter(&fsh_cb_owner_lock);
705 + fsh_cb_owner = curthread;
706 + mutex_exit(&fsh_cb_owner_lock);
707 + }
708 +
709 + ASSERT(MUTEX_HELD(&fsh_cb_lock));
710 +
664 711 for (fshci = list_head(&fsh_cblist); fshci != NULL;
665 712 fshci = list_next(&fsh_cblist, fshci)) {
666 713 cb = &fshci->fshci_cb;
667 714 if (cb->fshc_mount != NULL)
668 715 (*(cb->fshc_mount))(vfsp, cb->fshc_arg);
669 716 }
670 - rw_exit(&fsh_cblist_lock);
717 +
718 + if (!fsh_context) {
719 + mutex_enter(&fsh_cb_owner_lock);
720 + fsh_cb_owner = NULL;
721 + mutex_exit(&fsh_cb_owner_lock);
722 + mutex_exit(&fsh_cb_lock);
723 + }
671 724 }
672 725
673 726 /*
674 727 * This function is executed right before VFS_FREEVFS() is called in
675 728 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init().
676 729 * It executes all the free callbacks installed in the fsh.
677 730 *
678 731 * free() callback is the point after the handles associated with the hooks
679 732 * installed on this vfs_t become invalid
680 733 */
681 734 void
682 735 fsh_exec_free_callbacks(vfs_t *vfsp)
683 736 {
684 737 fsh_callback_int_t *fshci;
685 738 fsh_callback_t *cb;
739 + int fsh_context;
686 740
687 - rw_enter(&fsh_cblist_lock, RW_READER);
741 + mutex_enter(&fsh_cb_owner_lock);
742 + fsh_context = fsh_cb_owner == curthread;
743 + mutex_exit(&fsh_cb_owner_lock);
744 +
745 + if (!fsh_context) {
746 + mutex_enter(&fsh_cb_lock);
747 + mutex_enter(&fsh_cb_owner_lock);
748 + fsh_cb_owner = curthread;
749 + mutex_exit(&fsh_cb_owner_lock);
750 + }
751 +
752 + ASSERT(MUTEX_HELD(&fsh_cb_lock));
753 +
688 754 for (fshci = list_head(&fsh_cblist); fshci != NULL;
689 755 fshci = list_next(&fsh_cblist, fshci)) {
690 756 cb = &fshci->fshci_cb;
691 757 if (cb->fshc_free != NULL)
692 758 (*(cb->fshc_free))(vfsp, cb->fshc_arg);
693 759 }
694 - rw_exit(&fsh_cblist_lock);
760 +
761 + if (!fsh_context) {
762 + mutex_enter(&fsh_cb_owner_lock);
763 + fsh_cb_owner = NULL;
764 + mutex_exit(&fsh_cb_owner_lock);
765 + mutex_exit(&fsh_cb_lock);
766 + }
695 767 }
696 768
697 769 /*
698 770 * API for vnode.c/vfs.c to start executing the fsh for a given operation.
699 771 *
700 772 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it
701 773 * does, it executes it. If not, underlying vnodeop/vfsop is called.
702 774 *
703 - * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's
775 + * These interfaces are using fsh_res_ptr (in fsh_fsrec_prepare()), so it's
704 776 * absolutely necessary to call fsh_init() before using them. That's done in
705 777 * vfsinit().
706 778 *
707 779 * While these functions are executing, it's expected that necessary vfs_t's
708 780 * are held so that vfs_free() isn't called. vfs_free() expects that noone
709 781 * accesses vfs_fshrecord of a given vfs_t.
710 782 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo()
711 783 * alive and valid.
712 784 * All these expectations are met because these functions are used only in
713 785 * correspondng {fop,fsop}_foo() functions.
714 786 */
715 787 int
716 788 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
717 789 caller_context_t *ct)
718 790 {
719 791 int ret;
720 792 fsh_fsrecord_t *fsrecp;
721 793 fsh_int_t *fshi;
794 + fsh_exec_t *fshe;
795 + list_t exec_list;
722 796
723 - fsh_prepare_fsrec(vp->v_vfsp);
797 + fsh_fsrec_prepare(vp->v_vfsp);
724 798 fsrecp = vp->v_vfsp->vfs_fshrecord;
725 799
726 800 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
727 801 if (!(fsrecp->fshfsr_enabled)) {
728 802 rw_exit(&fsrecp->fshfsr_lock);
729 - return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
803 + return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
730 804 }
731 805
806 + list_create(&exec_list, sizeof (fsh_exec_t),
807 + offsetof(fsh_exec_t, fshe_node));
808 +
732 809 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
733 810 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
734 - if (fshi->fshi_hooks.read != NULL)
735 - if (fshi_hold(fshi))
736 - break;
811 + if (fshi->fshi_hooks.pre_read != NULL ||
812 + fshi->fshi_hooks.post_read != NULL) {
813 + if (fshi_hold(fshi)) {
814 + fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
815 + fshe->fshe_fshi = fshi;
816 + list_insert_tail(&exec_list, fshe);
817 + }
818 + }
737 819 }
738 820 rw_exit(&fsrecp->fshfsr_lock);
739 821
740 - if (fshi == NULL)
741 - return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct));
822 + /* Execute pre hooks */
823 + for (fshe = list_head(&exec_list); fshe != NULL;
824 + fshe = list_next(&exec_list, fshe)) {
825 + if (fshe->fshe_fshi->fshi_hooks.pre_read != NULL)
826 + (*fshe->fshe_fshi->fshi_hooks.pre_read)(
827 + fshe->fshe_fshi->fshi_hooks.arg,
828 + &fshe->fshe_instance,
829 + &vp, &uiop, &ioflag, &cr, &ct);
830 + }
742 831
743 - ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
744 - vp, uiop, ioflag, cr, ct);
745 - fshi_rele(fshi);
832 + ret = (*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
833 +
834 + /* Execute post hooks */
835 + while ((fshe = list_remove_tail(&exec_list)) != NULL) {
836 + if (fshe->fshe_fshi->fshi_hooks.post_read != NULL)
837 + ret = (*fshe->fshe_fshi->fshi_hooks.post_read)(
838 + ret, fshe->fshe_fshi->fshi_hooks.arg,
839 + fshe->fshe_instance,
840 + vp, uiop, ioflag, cr, ct);
841 + fshi_rele(fshe->fshe_fshi);
842 + kmem_free(fshe, sizeof (*fshe));
843 + }
844 + list_destroy(&exec_list);
845 +
746 846 return (ret);
747 847 }
748 848
749 849 int
750 850 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
751 851 caller_context_t *ct)
752 852 {
753 - fsh_int_t *fshi;
754 853 int ret;
755 854 fsh_fsrecord_t *fsrecp;
855 + fsh_int_t *fshi;
856 + fsh_exec_t *fshe;
857 + list_t exec_list;
756 858
757 - fsh_prepare_fsrec(vp->v_vfsp);
859 + fsh_fsrec_prepare(vp->v_vfsp);
758 860 fsrecp = vp->v_vfsp->vfs_fshrecord;
759 861
760 862 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
761 - if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) {
863 + if (!(fsrecp->fshfsr_enabled)) {
762 864 rw_exit(&fsrecp->fshfsr_lock);
763 - return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
865 + return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
764 866 }
765 867
868 + list_create(&exec_list, sizeof (fsh_exec_t),
869 + offsetof(fsh_exec_t, fshe_node));
870 +
766 871 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
767 872 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
768 - if (fshi->fshi_hooks.write != NULL)
769 - if (fshi_hold(fshi))
770 - break;
873 + if (fshi->fshi_hooks.pre_write != NULL ||
874 + fshi->fshi_hooks.post_write != NULL) {
875 + if (fshi_hold(fshi)) {
876 + fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
877 + fshe->fshe_fshi = fshi;
878 + list_insert_tail(&exec_list, fshe);
879 + }
880 + }
771 881 }
772 882 rw_exit(&fsrecp->fshfsr_lock);
773 883
774 - if (fshi == NULL)
775 - return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct));
884 + /* Execute pre hooks */
885 + for (fshe = list_head(&exec_list); fshe != NULL;
886 + fshe = list_next(&exec_list, fshe)) {
887 + if (fshe->fshe_fshi->fshi_hooks.pre_write != NULL)
888 + (*fshe->fshe_fshi->fshi_hooks.pre_write)(
889 + fshe->fshe_fshi->fshi_hooks.arg,
890 + &fshe->fshe_instance,
891 + &vp, &uiop, &ioflag, &cr, &ct);
892 + }
776 893
777 - ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
778 - vp, uiop, ioflag, cr, ct);
779 - fshi_rele(fshi);
894 + ret = (*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
895 +
896 + /* Execute post hooks */
897 + while ((fshe = list_remove_tail(&exec_list)) != NULL) {
898 + if (fshe->fshe_fshi->fshi_hooks.post_write != NULL)
899 + ret = (*fshe->fshe_fshi->fshi_hooks.post_write)(
900 + ret, fshe->fshe_fshi->fshi_hooks.arg,
901 + fshe->fshe_instance,
902 + vp, uiop, ioflag, cr, ct);
903 + fshi_rele(fshe->fshe_fshi);
904 + kmem_free(fshe, sizeof (*fshe));
905 + }
906 + list_destroy(&exec_list);
907 +
780 908 return (ret);
781 909 }
782 910
783 911 int
784 912 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
785 913 {
914 + int ret;
786 915 fsh_fsrecord_t *fsrecp;
787 916 fsh_int_t *fshi;
788 - int ret;
917 + fsh_exec_t *fshe;
918 + list_t exec_list;
789 919
790 - fsh_prepare_fsrec(vfsp);
920 + fsh_fsrec_prepare(vfsp);
791 921 fsrecp = vfsp->vfs_fshrecord;
792 922
793 923 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
794 924 if (!(fsrecp->fshfsr_enabled)) {
795 925 rw_exit(&fsrecp->fshfsr_lock);
796 - return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
926 + return ((*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr));
797 927 }
798 928
929 + list_create(&exec_list, sizeof (fsh_exec_t),
930 + offsetof(fsh_exec_t, fshe_node));
931 +
799 932 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
800 933 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
801 - if (fshi->fshi_hooks.mount != NULL)
802 - if (fshi_hold(fshi))
803 - break;
934 + if (fshi->fshi_hooks.pre_mount != NULL ||
935 + fshi->fshi_hooks.post_mount != NULL) {
936 + if (fshi_hold(fshi)) {
937 + fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
938 + fshe->fshe_fshi = fshi;
939 + list_insert_tail(&exec_list, fshe);
940 + }
941 + }
804 942 }
805 943 rw_exit(&fsrecp->fshfsr_lock);
806 944
807 - if (fshi == NULL)
808 - return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
945 + /* Execute pre hooks */
946 + for (fshe = list_head(&exec_list); fshe != NULL;
947 + fshe = list_next(&exec_list, fshe)) {
948 + if (fshe->fshe_fshi->fshi_hooks.pre_mount != NULL)
949 + (*fshe->fshe_fshi->fshi_hooks.pre_mount)(
950 + &fshe->fshe_fshi->fshi_hooks.arg,
951 + &fshe->fshe_instance,
952 + &vfsp, &mvp, &uap, &cr);
953 + }
809 954
810 - ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
811 - vfsp, mvp, uap, cr);
812 - fshi_rele(fshi);
955 + ret = (*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
956 +
957 + /* Execute post hooks */
958 + while ((fshe = list_remove_tail(&exec_list)) != NULL) {
959 + if (fshe->fshe_fshi->fshi_hooks.post_mount != NULL)
960 + ret = (*fshe->fshe_fshi->fshi_hooks.post_mount)(
961 + ret, fshe->fshe_fshi->fshi_hooks.arg,
962 + fshe->fshe_instance,
963 + vfsp, mvp, uap, cr);
964 + fshi_rele(fshe->fshe_fshi);
965 + kmem_free(fshe, sizeof (*fshe));
966 + }
967 + list_destroy(&exec_list);
968 +
813 969 return (ret);
814 970 }
815 971
816 972 int
817 973 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr)
818 974 {
975 + int ret;
819 976 fsh_fsrecord_t *fsrecp;
820 977 fsh_int_t *fshi;
821 - int ret;
978 + fsh_exec_t *fshe;
979 + list_t exec_list;
822 980
823 - fsh_prepare_fsrec(vfsp);
981 + fsh_fsrec_prepare(vfsp);
824 982 fsrecp = vfsp->vfs_fshrecord;
825 983
826 984 rw_enter(&fsrecp->fshfsr_lock, RW_READER);
827 985 if (!(fsrecp->fshfsr_enabled)) {
828 986 rw_exit(&fsrecp->fshfsr_lock);
829 - return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
987 + return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
830 988 }
831 989
990 + list_create(&exec_list, sizeof (fsh_exec_t),
991 + offsetof(fsh_exec_t, fshe_node));
992 +
832 993 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL;
833 994 fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
834 - if (fshi->fshi_hooks.unmount != NULL)
835 - if (fshi_hold(fshi))
836 - break;
995 + if (fshi->fshi_hooks.pre_unmount != NULL ||
996 + fshi->fshi_hooks.post_unmount != NULL) {
997 + if (fshi_hold(fshi)) {
998 + fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP);
999 + fshe->fshe_fshi = fshi;
1000 + list_insert_tail(&exec_list, fshe);
1001 + }
1002 + }
837 1003 }
838 1004 rw_exit(&fsrecp->fshfsr_lock);
839 1005
840 - if (fshi == NULL)
841 - return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr));
1006 + /* Execute pre hooks */
1007 + for (fshe = list_head(&exec_list); fshe != NULL;
1008 + fshe = list_next(&exec_list, fshe)) {
1009 + if (fshe->fshe_fshi->fshi_hooks.pre_unmount != NULL)
1010 + (*fshe->fshe_fshi->fshi_hooks.pre_unmount)(
1011 + fshe->fshe_fshi->fshi_hooks.arg,
1012 + &fshe->fshe_instance,
1013 + &vfsp, &flag, &cr);
1014 + }
842 1015
843 - ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
844 - vfsp, flag, cr);
845 - fshi_rele(fshi);
1016 + ret = (*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr);
1017 +
1018 + /* Execute post hooks */
1019 + while ((fshe = list_remove_tail(&exec_list)) != NULL) {
1020 + if (fshe->fshe_fshi->fshi_hooks.post_unmount != NULL)
1021 + ret = (*fshe->fshe_fshi->fshi_hooks.post_unmount)(
1022 + ret, fshe->fshe_fshi->fshi_hooks.arg,
1023 + fshe->fshe_instance,
1024 + vfsp, flag, cr);
1025 + fshi_rele(fshe->fshe_fshi);
1026 + kmem_free(fshe, sizeof (*fshe));
1027 + }
1028 + list_destroy(&exec_list);
1029 +
846 1030 return (ret);
847 1031 }
848 1032
849 1033 /*
850 - * This is the funtion used by fsh_prepare_fsrec() to allocate a new
1034 + * This is the funtion used by fsh_fsrec_prepare() to allocate a new
851 1035 * fsh_fsrecord. This function is called by the first function which
852 1036 * access the vfs_fshrecord and finds out it's NULL.
853 1037 */
854 1038 static fsh_fsrecord_t *
855 1039 fsh_fsrec_create()
856 1040 {
857 1041 fsh_fsrecord_t *fsrecp;
858 1042
859 1043 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP);
860 1044 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t),
861 - offsetof(fsh_int_t, fshi_next));
1045 + offsetof(fsh_int_t, fshi_node));
862 1046 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL);
863 1047 fsrecp->fshfsr_enabled = 1;
864 1048 return (fsrecp);
865 1049 }
866 1050
867 1051
868 1052 /*
869 - * This call can be used ONLY in vfs_free(). It's assumed that no other
870 - * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed
871 - * are executing while a call to fsh_fsrec_destroy() is made. With this
872 - * assumptions, no concurrency issues occur.
1053 + * This call must be used ONLY in vfs_free().
873 1054 *
874 - * Before calling this function outside the fsh, it's sufficient and
875 - * required to check if the passed fsh_fsrecord * is not NULL. We don't
876 - * have to check if it is not equal to fsh_res_ptr, because all the fsh API
877 - * calls involving this vfs_t should end before vfs_free() is called
878 - * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is
879 - * guaranteed by the explicit requirement that the caller of fsh API holds
880 - * the vfs_t when needed.
1055 + * It is required and sufficient to check if fsh_fsrecord_t is not NULL before
1056 + * passing it to fsh_fsrec_destroy.
881 1057 *
882 - * All the remaining hooks are being removed.
1058 + * All the remaining hooks are being removed here.
883 1059 */
884 1060 void
885 1061 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp)
886 1062 {
887 1063 fsh_int_t *fshi;
888 1064
889 1065 VERIFY(fsrecp != NULL);
890 1066
891 1067 _NOTE(CONSTCOND)
892 1068 while (1) {
893 1069 mutex_enter(&fsh_lock);
894 - /* No need here to hold fshfsr_lock */
1070 + rw_enter(&fsrecp->fshfsr_lock, RW_WRITER);
895 1071 fshi = list_remove_head(&fsrecp->fshfsr_list);
1072 + rw_exit(&fsrecp->fshfsr_lock);
896 1073 if (fshi == NULL) {
897 1074 mutex_exit(&fsh_lock);
898 1075 break;
899 1076 }
900 1077 ASSERT(fshi->fshi_doomed == 0);
901 1078 list_remove(&fsh_map, fshi);
902 1079 mutex_exit(&fsh_lock);
903 1080
904 1081 if (fshi->fshi_hooks.remove_cb != NULL)
905 1082 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg,
906 1083 fshi->fshi_handle);
1084 +
907 1085 id_free(fsh_idspace, fshi->fshi_handle);
908 1086 mutex_destroy(&fshi->fshi_lock);
909 1087 kmem_free(fshi, sizeof (*fshi));
910 1088
911 1089 }
912 1090
913 1091 list_destroy(&fsrecp->fshfsr_list);
914 1092 rw_destroy(&fsrecp->fshfsr_lock);
915 1093 kmem_free(fsrecp, sizeof (*fsrecp));
916 1094 }
917 1095
918 1096 /*
919 1097 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called
920 1098 * before every other fsh call.
921 1099 */
922 1100 void
923 1101 fsh_init(void)
924 1102 {
925 - rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL);
1103 + mutex_init(&fsh_cb_lock, NULL, MUTEX_DRIVER, NULL);
1104 + mutex_init(&fsh_cb_owner_lock, NULL, MUTEX_DRIVER, NULL);
926 1105 list_create(&fsh_cblist, sizeof (fsh_callback_int_t),
927 - offsetof(fsh_callback_int_t, fshci_next));
1106 + offsetof(fsh_callback_int_t, fshci_node));
928 1107
929 1108 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL);
930 1109
931 1110 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t,
932 1111 fshi_global));
933 1112
934 - /* See comment above fsh_prepare_fsrec() */
1113 + /* See comment above fsh_fsrec_prepare() */
935 1114 fsh_res_ptr = (void *)-1;
936 1115
937 1116 fsh_idspace = id_space_create("fsh", 0, fsh_limit);
938 -}
939 -
940 -/*
941 - * These functions are used to pass control to the next hook or underlying
942 - * vop or vfsop. It's client doesn't have to worry about any locking.
943 - */
944 -int
945 -fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
946 - cred_t *cr, caller_context_t *ct)
947 -{
948 - int ret;
949 - fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
950 -
951 - /*
952 - * The passed fshi is the previous hook (the one from which we've been
953 - * called). We need to find the next one.
954 - */
955 - rw_enter(&fsrecp->fshfsr_lock, RW_READER);
956 - for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
957 - fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
958 - if (fshi->fshi_hooks.read != NULL)
959 - if (fshi_hold(fshi))
960 - break;
961 - }
962 - rw_exit(&fsrecp->fshfsr_lock);
963 -
964 - if (fshi == NULL)
965 - return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct));
966 -
967 - ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg,
968 - vp, uiop, ioflag, cr, ct);
969 - fshi_rele(fshi);
970 - return (ret);
971 -}
972 -
973 -int
974 -fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag,
975 - cred_t *cr, caller_context_t *ct)
976 -{
977 - fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord;
978 - int ret;
979 -
980 - /*
981 - * The passed fshi is the previous hook (the one from which we've been
982 - * called). We need to find the next one.
983 - */
984 - rw_enter(&fsrecp->fshfsr_lock, RW_READER);
985 - for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
986 - fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
987 - if (fshi->fshi_hooks.write != NULL)
988 - if (fshi_hold(fshi))
989 - break;
990 - }
991 - rw_exit(&fsrecp->fshfsr_lock);
992 -
993 - if (fshi == NULL)
994 - return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct));
995 -
996 - ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg,
997 - vp, uiop, ioflag, cr, ct);
998 - fshi_rele(fshi);
999 - return (ret);
1000 -}
1001 -
1002 -int
1003 -fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap,
1004 - cred_t *cr)
1005 -{
1006 - fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1007 - int ret;
1008 -
1009 - /*
1010 - * The passed fshi is the previous hook (the one from which we've been
1011 - * called). We need to find the next one.
1012 - */
1013 - rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1014 - for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1015 - fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1016 - if (fshi->fshi_hooks.mount != NULL)
1017 - if (fshi_hold(fshi))
1018 - break;
1019 - }
1020 - rw_exit(&fsrecp->fshfsr_lock);
1021 -
1022 - if (fshi == NULL)
1023 - return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr));
1024 -
1025 - ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg,
1026 - vfsp, mvp, uap, cr);
1027 - fshi_rele(fshi);
1028 - return (ret);
1029 -}
1030 -
1031 -int
1032 -fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr)
1033 -{
1034 - fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord;
1035 - int ret;
1036 -
1037 - /*
1038 - * The passed fshi is the previous hook (the one from which we've been
1039 - * called). We need to find the next one.
1040 - */
1041 - rw_enter(&fsrecp->fshfsr_lock, RW_READER);
1042 - for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL;
1043 - fshi = list_next(&fsrecp->fshfsr_list, fshi)) {
1044 - if (fshi->fshi_hooks.unmount != NULL)
1045 - if (fshi_hold(fshi))
1046 - break;
1047 - }
1048 - rw_exit(&fsrecp->fshfsr_lock);
1049 -
1050 - if (fshi == NULL)
1051 - return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr));
1052 -
1053 - ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg,
1054 - vfsp, flag, cr);
1055 - fshi_rele(fshi);
1056 - return (ret);
1057 1117 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX