1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2013 Damian Bogel. All rights reserved. 14 */ 15 16 #include <sys/debug.h> 17 #include <sys/errno.h> 18 #include <sys/fsh.h> 19 #include <sys/fsh_impl.h> 20 #include <sys/id_space.h> 21 #include <sys/kmem.h> 22 #include <sys/ksynch.h> 23 #include <sys/list.h> 24 #include <sys/sunddi.h> 25 #include <sys/sysmacros.h> 26 #include <sys/types.h> 27 #include <sys/vfs.h> 28 #include <sys/vnode.h> 29 30 /* 31 * Filesystem hook framework (fsh) 32 * 33 * 1. Abstract. 34 * The main goal of the filesystem hook framework is to provide an easy way to 35 * inject client-defined behaviour into vfs/vnode calls. fsh works on 36 * vfs_t granularity. 37 * 38 * 39 * 2. Overview. 40 * fsh_t is the main object in the fsh. An fsh_t is a structure containing: 41 * - pointers to hooking functions (named after corresponding 42 * vnodeops/vfsops) 43 * - a pointer to an argument to pass (this is shared for all the 44 * hooks in a given fsh_t) 45 * - a pointer to the *hook remove callback* - it's being fired after a 46 * hook is removed and the hook has stopped executing. It's safe to destroy 47 * any data associated with this hook. 48 * 49 * The information from fsh_t is copied by the fsh and an fsh_handle_t 50 * is returned. It should be used for further removing. 51 * 52 * 53 * 3. Usage. 54 * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions 55 * are held by the caller when needed. fsh does no vfs_t/vnode_t locking. 56 * 57 * fsh_t is a structure filled out by the client. If a client does not want 58 * to add/remove a hook for function foo(), he should fill the foo field of 59 * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with 60 * two additional arguments: 61 * - fsh_int_t *fsh_int - this argument MUST be passed to 62 * hook_next_foo(). fsh wouldn't know which hook to execute next 63 * without it 64 * - void *arg - this is the argument passed with fsh_t during 65 * installation 66 * - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback 67 * (mentioned earlier); it's first argument is arg, the second is the 68 * handle 69 * 70 * After installation, an fsh_handle_t is returned to the caller. 71 * 72 * Every hook function is responsible for passing the control to the next 73 * hook associated with a particular call. In order to provide an easy way to 74 * modify the behaviour of a function call both before and after the 75 * underlying vfsop/vnodeop (or next hook) execution, a hook has to call 76 * fsh_next_foo() at some point. This function does necessary internal 77 * operations and calls the next hook, until there's no hook left, then it 78 * calls the underlying vfsop/vnodeop. 79 * Example: 80 * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) { 81 * cmn_err(CE_NOTE, "freefs called!\n"); 82 * return (fsh_next_freefs(fsh_int, vfsp)); 83 * } 84 * 85 * 86 * A client might want to fire callbacks when vfs_t's are being mounted 87 * or freed. There's an fsh_callback_t structure provided to install such 88 * callbacks along with the API. 89 * It is legal to call fsh_hook_{install,remove}() inside a mount callback 90 * WITHOUT holding the vfs_t. 91 * 92 * After vfs_t's free callback returns, all the handles associated with the 93 * hooks installed on this vfs_t are invalid and must not be used. 94 * 95 * 96 * 4. API 97 * None of the APIs should be called during interrupt context above lock 98 * level. The only exceptions are fsh_next_foo() functions, which do not use 99 * locks. 100 * 101 * a) fsh.h 102 * Any of these functions could be called inside a hook or a hook remove 103 * callback. 104 * fsh_callback_{install,remove}() must not be called inside a {mount,free} 105 * callback. Doing so will cause a deadlock. Other functions can be called 106 * inside {mount,free} callbacks. 107 * 108 * fsh_fs_enable(vfs_t *vfsp) 109 * fsh_fs_disable(vfs_t *vfsp) 110 * Enables/disables fsh for a given vfs_t. 111 * 112 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks) 113 * Installs hooks on vfsp filesystem. 114 * It's important that hooks are executed in LIFO installation order, 115 * which means that if there are hooks A and B installed in this order, B 116 * is going to be executed before A. 117 * It returns a correct handle, or (-1) if hook/callback limit exceeded. 118 * The handle is valid until a free callback returns or an explicit call 119 * to fsh_hook_remove(). 120 * 121 * fsh_hook_remove(fsh_handle_t handle) 122 * Removes a hook and invalidates the handle. 123 * It is guaranteed that after this funcion returns, calls to 124 * vnodeops/vfsops won't go through this hook, although there might be 125 * some threads still executing this hook. When hook remove callback is 126 * fired, it is guaranteed that the hook won't be executed anymore. It is 127 * safe to remove all the internal data associated with this hook inside 128 * the hook remove callback. The hook remove callback could be called 129 * inside fsh_hook_remove(). 130 * 131 * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS) 132 * This is the function which should be called once in every hook. It 133 * does the necessary internal operations and passes control to the 134 * next hook or, if there's no hook left, to the underlying 135 * vfsop/vnodeop. 136 * 137 * fsh_callback_install(fsh_callback_t *callback) 138 * fsh_callback_remove(fsh_callback_handle_t handle) 139 * Installs/removes callbacks for vfs_t mount/free. The mount callback 140 * is executed right before domount() returns. The free callback is 141 * called right before VFS_FREEVFS() is called. 142 * The fsh_callback_install() returns a correct handle, or (-1) if 143 * hook/callback limit exceeded. 144 * 145 * b) fsh_impl.h (for vfs.c and vnode.c only) 146 * fsh_init() 147 * This call has to be done in vfsinit(). It initialises the fsh. It 148 * is absolutely necessary that this call is made before any other fsh 149 * operation. 150 * 151 * fsh_exec_mount_callbacks(vfs_t *vfsp) 152 * fsh_exec_free_callbacks(vfs_t *vfsp) 153 * Used to execute all fsh callbacks for {mount,free} of a vfs_t. 154 * 155 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp) 156 * Destroys an fsh_fsrecord structure. All the hooks installed on this 157 * vfs_t are then destroyed. free callback is called before this function. 158 * 159 * fsh_foo(ARGUMENTS) 160 * Function used to start executing the hook chain for a given call. 161 * 162 * 163 * 5. Internals. 164 * fsh_int_t is an internal hook structure. It is reference counted. 165 * fshi_hold() and fshi_rele() should be used whenever needed. 166 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list 167 * (local to vfs_t). All entries are unique and are identified by fshi_handle. 168 * 169 * fsh_int_t properties: 170 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both 171 * fsh_map and fshfsr_list 172 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook 173 * from fsh_map and marks the hook as *doomed* 174 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread 175 * executing it 176 * - if fsh_int_t is marked as *doomed*, the reference counter is not 177 * be increased and thus no thread can acquire this fsh_int_t 178 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this 179 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map 180 * - fsh_int_t could be also destroyed without fsh_hook_remove() call, 181 * that happens only inside fsh_fsrec_destroy() where it is guaranteed 182 * that there is no thread executing the hook 183 * 184 * 185 * fsh_fsrecord_t is a structure which lives inside a vfs_t. 186 * fsh_fsrecord_t contains: 187 * - an rw-lock that protects the structure 188 * - a list of hooks installed on this vfs_t 189 * - a flag which tells whether fsh is enabled on this vfs_t 190 * 191 * 192 * fsh_prepare_fsrec rule: 193 * Every function that needs vfsp->vfs_fshrecord has to call 194 * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to 195 * use vfsp->vfs_fshrecord. 196 * 197 * Unfortunately, because of unexpected behaviour of some filesystems (no use 198 * of vfs_alloc()/vfs_init()) there's no good place to initialise the 199 * fsh_fshrecord_t structure. The approach being used here is to check if it's 200 * initialised in every call. Because of the fact that no lock could be used 201 * here (the same problem with initialisation), a spinlock is used. This is 202 * explained in more detail in a comment before fsh_prepare_fsrec(). After 203 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord 204 * pointer locally, because it won't be changed until vfs_free() is called. 205 * 206 * The only exception from the fsh_prepare_fsrec() rule is vfs_free(), 207 * where there is expected that no other fsh calls would be made for the 208 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a 209 * valid pointer and could not be concurrently accessed. 210 * 211 * When there are no fsh functions (that use a particular fsh_fsrecord_t) 212 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It 213 * would be NULL or a pointer to an initialised fsh_fsrecord_t. 214 * 215 * 216 * Callbacks: 217 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right 218 * before returning from domount()@vfs.c. 219 * 220 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right 221 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0. 222 * 223 * 224 * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS) 225 * This function is quite simple. It takes the fsh_int_t and passes control 226 * to the next hook or to the underlying vnodeop/vfsop. 227 * 228 * 229 * 6. Locking 230 * a) public 231 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is 232 * needed, the client does that. 233 * 234 * fsh_callback_{install,remove} must not be called inside a callback, because 235 * it will cause a deadlock. 236 * 237 * b) internal 238 * Locking diagram: 239 * 240 * fsh_hook_install() fsh_hook_remove() fsh_fsrec_destroy() 241 * | | | 242 * | | | 243 * +------------------+ | +------------+ 244 * | | | 245 * V V V 246 * fsh_lock 247 * | | 248 * | +----- fshfsr_lock, RW_WRITER ---+ 249 * | | 250 * V | 251 * +---------------------------------------+ | 252 * | fsh_map | | 253 * | | | 254 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+ 255 * | +------------------------------^--------+ 256 * | | 257 * | | 258 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER 259 * | | 260 * | | 261 * fsh_read(), fshi_rele() 262 * fsh_write(), 263 * ..., Might be called from: 264 * fsh_next_read(), fsh_hook_remove() 265 * fsh_next_write(), fsh_read(), fsh_write(), ... 266 * ... fsh_next_read(), fsh_next_write(), ... 267 * 268 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install, 269 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since 270 * it destroys the unremoved hooks). It is used only when fsh_map needs to be 271 * locked. The usage of this lock guarantees that the data in fsh_map and 272 * fshfsr_lists is consistent. 273 */ 274 275 276 /* Internals */ 277 struct fsh_int { 278 fsh_handle_t fshi_handle; 279 fsh_t fshi_hooks; 280 vfs_t *fshi_vfsp; 281 282 kmutex_t fshi_lock; 283 uint64_t fshi_ref; 284 uint64_t fshi_doomed; /* changed inside fsh_lock */ 285 286 /* next node in fshfsr_list */ 287 list_node_t fshi_next; 288 289 /* next node in fsh_map */ 290 list_node_t fshi_global; 291 }; 292 293 typedef struct fsh_callback_int { 294 fsh_callback_t fshci_cb; 295 fsh_callback_handle_t fshci_handle; 296 list_node_t fshci_next; 297 } fsh_callback_int_t; 298 299 300 static kmutex_t fsh_lock; 301 302 /* 303 * fsh_fsrecord_t is the main internal structure. It's content is protected 304 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for 305 * the vfs_t that contains the fsh_fsrecord_t. 306 */ 307 struct fsh_fsrecord { 308 krwlock_t fshfsr_lock; 309 int fshfsr_enabled; 310 list_t fshfsr_list; 311 }; 312 313 /* 314 * Global list of fsh_int_t. Protected by fsh_lock. 315 */ 316 static list_t fsh_map; 317 318 /* 319 * Global list of fsh_callback_int_t. 320 */ 321 static krwlock_t fsh_cblist_lock; 322 static list_t fsh_cblist; 323 324 /* 325 * A reserved pointer for fsh purposes. It is used because of the method 326 * chosen for solving concurrency issues with vfs_fshrecord. The full 327 * explanation is in the big theory statement at the beginning of this 328 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init(). 329 */ 330 static void *fsh_res_ptr; 331 332 static fsh_fsrecord_t *fsh_fsrec_create(); 333 334 int fsh_limit = INT_MAX; 335 static id_space_t *fsh_idspace; 336 337 /* 338 * fsh_prepare_fsrec() 339 * 340 * Important note: 341 * Before using this function, fsh_init() MUST be called. We do that in 342 * vfsinit()@vfs.c. 343 * 344 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the 345 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not 346 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled 347 * filesystems could do the same thing. That's why this solution is 348 * introduced. It should be called before any code that needs access to 349 * vfs_fshrecord. 350 * 351 * Locking: 352 * There are no locks here, because there's no good place to initialise 353 * the lock. Concurrency issues are solved by using atomic instructions 354 * and a spinlock, which is spinning only once for a given vfs_t. Because 355 * of that, the usage of the spinlock isn't bad at all. 356 * 357 * How it works: 358 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to 359 * fsh_res_ptr. That's a signal for other threads, that the structure 360 * is being initialised. 361 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait, 362 * because vfs_fshrecord is being initialised by another call. 363 * c) other cases: 364 * vfs_fshrecord is already initialised, so we can use it. It won't change 365 * until vfs_free() is called. It can't happen when someone is holding 366 * the vfs_t, which is expected from the caller of fsh API. 367 */ 368 static void 369 fsh_prepare_fsrec(vfs_t *vfsp) 370 { 371 fsh_fsrecord_t *fsrec; 372 373 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL, 374 fsh_res_ptr)) == fsh_res_ptr) 375 ; 376 377 if (fsrec == NULL) 378 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create()); 379 } 380 381 /* 382 * API for enabling/disabling fsh per vfs_t. 383 * 384 * A newly created vfs_t has fsh enabled by default. If one would want to change 385 * this behaviour, mount callbacks could be used. 386 * 387 * The caller is expected to hold the vfs_t. 388 * 389 * These functions must NOT be called in a hook. 390 */ 391 void 392 fsh_fs_enable(vfs_t *vfsp) 393 { 394 fsh_prepare_fsrec(vfsp); 395 396 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 397 vfsp->vfs_fshrecord->fshfsr_enabled = 1; 398 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 399 } 400 401 void 402 fsh_fs_disable(vfs_t *vfsp) 403 { 404 fsh_prepare_fsrec(vfsp); 405 406 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 407 vfsp->vfs_fshrecord->fshfsr_enabled = 0; 408 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 409 } 410 411 /* 412 * API used for installing hooks. fsh_handle_t is returned for further 413 * actions (currently just removing) on this set of hooks. 414 * 415 * fsh_t fields: 416 * - arg - argument passed to every hook 417 * - remove_cb - remove callback, called after a hook is removed and all the 418 * threads stops executing it 419 * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops; 420 * if there is no hook desired for an operation, it should be set to 421 * NULL 422 * 423 * It's important that the hooks are executed in LIFO installation order (they 424 * are added to the head of the hook list). 425 * 426 * The caller is expected to hold the vfs_t. 427 * 428 * Returns (-1) if hook/callback limit exceeded, handle otherwise. 429 */ 430 fsh_handle_t 431 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks) 432 { 433 fsh_handle_t handle; 434 fsh_int_t *fshi; 435 436 fsh_prepare_fsrec(vfsp); 437 438 if ((handle = id_alloc(fsh_idspace)) == -1) 439 return (-1); 440 441 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP); 442 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL); 443 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks)); 444 fshi->fshi_handle = handle; 445 fshi->fshi_doomed = 0; 446 fshi->fshi_ref = 1; 447 fshi->fshi_vfsp = vfsp; 448 449 mutex_enter(&fsh_lock); 450 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 451 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi); 452 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 453 454 list_insert_head(&fsh_map, fshi); 455 mutex_exit(&fsh_lock); 456 457 return (handle); 458 } 459 460 static int 461 fshi_hold(fsh_int_t *fshi) 462 { 463 int can_hold; 464 465 mutex_enter(&fshi->fshi_lock); 466 if (fshi->fshi_doomed == 1) { 467 can_hold = 0; 468 } else { 469 fshi->fshi_ref++; 470 can_hold = 1; 471 } 472 mutex_exit(&fshi->fshi_lock); 473 474 return (can_hold); 475 } 476 477 /* 478 * This function must not be called while fshfsr_lock is held. Doing so could 479 * cause a deadlock. 480 */ 481 static void 482 fshi_rele(fsh_int_t *fshi) 483 { 484 int destroy; 485 486 mutex_enter(&fshi->fshi_lock); 487 ASSERT(fshi->fshi_ref > 0); 488 fshi->fshi_ref--; 489 if (fshi->fshi_ref == 0) { 490 ASSERT(fshi->fshi_doomed == 1); 491 destroy = 1; 492 } else { 493 destroy = 0; 494 } 495 mutex_exit(&fshi->fshi_lock); 496 497 if (destroy) { 498 /* 499 * At this point, we are sure that fsh_hook_remove() has been 500 * called, that's why we don't remove the fshi from fsh_map. 501 * fsh_hook_remove() did that already. 502 */ 503 fsh_fsrecord_t *fsrecp; 504 505 if (fshi->fshi_hooks.remove_cb != NULL) 506 (*fshi->fshi_hooks.remove_cb)( 507 fshi->fshi_hooks.arg, fshi->fshi_handle); 508 /* 509 * We don't have to call fsh_prepare_fsrec() here. 510 * fsh_fsrecord_t is already initialised, because we've found a 511 * mapping for the given handle. 512 */ 513 fsrecp = fshi->fshi_vfsp->vfs_fshrecord; 514 ASSERT(fsrecp != NULL); 515 ASSERT(fsrecp != fsh_res_ptr); 516 517 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER); 518 list_remove(&fsrecp->fshfsr_list, fshi); 519 rw_exit(&fsrecp->fshfsr_lock); 520 521 id_free(fsh_idspace, fshi->fshi_handle); 522 mutex_destroy(&fshi->fshi_lock); 523 kmem_free(fshi, sizeof (*fshi)); 524 } 525 } 526 527 /* 528 * Used for removing a hook set. 529 * 530 * fsh_hook_remove() invalidates the given handle. 531 * 532 * It is guaranteed, that after successful return from fsh_hook_remove(), 533 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't 534 * go through this hook. 535 * 536 * There is no guarantee that after fsh_hook_remove() returns, the hook 537 * associated with the handle won't be executing. Instead, it is guaranteed that 538 * when remove_cb() is called, the hook finished it's execution in all threads. 539 * It is safe to destroy all internal data associated with this hook inside 540 * remove_cb(). 541 * 542 * It is possible that remove_cb() would be called before fsh_hook_remove() 543 * returns. 544 * 545 * Returns (-1) if hook wasn't found, 0 otherwise. 546 */ 547 int 548 fsh_hook_remove(fsh_handle_t handle) 549 { 550 fsh_int_t *fshi; 551 552 mutex_enter(&fsh_lock); 553 for (fshi = list_head(&fsh_map); fshi != NULL; 554 fshi = list_next(&fsh_map, fshi)) { 555 if (fshi->fshi_handle == handle) { 556 list_remove(&fsh_map, fshi); 557 break; 558 } 559 } 560 561 if (fshi == NULL) 562 return (-1); 563 564 mutex_enter(&fshi->fshi_lock); 565 ASSERT(fshi->fshi_doomed == 0); 566 fshi->fshi_doomed = 1; 567 mutex_exit(&fshi->fshi_lock); 568 mutex_exit(&fsh_lock); 569 570 fshi_rele(fshi); 571 572 return (0); 573 } 574 575 /* 576 * API for installing global mount/free callbacks. 577 * 578 * fsh_callback_t fields: 579 * fshc_arg - argument passed to the callbacks 580 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count 581 * drops to 0 582 * fshc_mount - callback fired right before returning from domount() 583 * The first argument of these callbacks is the vfs_t that is mounted/freed. 584 * The second one is the fshc_arg. 585 * 586 * fsh_callback_handle_t is filled out by this function. 587 * 588 * This function must NOT be called in a callback, because it will cause 589 * a deadlock. 590 * 591 * Returns (-1) if hook/callback limit exceeded. 592 */ 593 fsh_callback_handle_t 594 fsh_callback_install(fsh_callback_t *callback) 595 { 596 fsh_callback_int_t *fshci; 597 fsh_callback_handle_t handle; 598 599 if ((handle = id_alloc(fsh_idspace)) == -1) 600 return (-1); 601 602 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP); 603 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb)); 604 fshci->fshci_handle = handle; 605 606 /* If it is called in a {mount,free} callback, causes deadlock. */ 607 rw_enter(&fsh_cblist_lock, RW_WRITER); 608 list_insert_head(&fsh_cblist, fshci); 609 rw_exit(&fsh_cblist_lock); 610 611 return (handle); 612 } 613 614 /* 615 * API for removing global mount/free callbacks. 616 * 617 * This function must NOT be called in a callback, because it will cause 618 * a deadlock. 619 * 620 * Returns (-1) if callback wasn't found, 0 otherwise. 621 */ 622 int 623 fsh_callback_remove(fsh_callback_handle_t handle) 624 { 625 fsh_callback_int_t *fshci; 626 627 /* If it is called in a {mount,free} callback, causes deadlock. */ 628 rw_enter(&fsh_cblist_lock, RW_WRITER); 629 for (fshci = list_head(&fsh_cblist); fshci != NULL; 630 fshci = list_next(&fsh_cblist, fshci)) { 631 if (fshci->fshci_handle == handle) { 632 list_remove(&fsh_cblist, fshci); 633 break; 634 } 635 } 636 rw_exit(&fsh_cblist_lock); 637 638 if (fshci == NULL) 639 return (-1); 640 641 kmem_free(fshci, sizeof (*fshci)); 642 id_free(fsh_idspace, handle); 643 644 return (0); 645 } 646 647 /* 648 * This function is executed right before returning from domount()@vfs.c. 649 * We are sure that it's called only after fsh_init(). 650 * It executes all the mount callbacks installed in the fsh. 651 * 652 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal 653 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding 654 * this vfs_t. This guarantee should be preserved, because it's in the "Usage" 655 * section in the big theory statement at the top of this file. 656 */ 657 void 658 fsh_exec_mount_callbacks(vfs_t *vfsp) 659 { 660 fsh_callback_int_t *fshci; 661 fsh_callback_t *cb; 662 663 rw_enter(&fsh_cblist_lock, RW_READER); 664 for (fshci = list_head(&fsh_cblist); fshci != NULL; 665 fshci = list_next(&fsh_cblist, fshci)) { 666 cb = &fshci->fshci_cb; 667 if (cb->fshc_mount != NULL) 668 (*(cb->fshc_mount))(vfsp, cb->fshc_arg); 669 } 670 rw_exit(&fsh_cblist_lock); 671 } 672 673 /* 674 * This function is executed right before VFS_FREEVFS() is called in 675 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init(). 676 * It executes all the free callbacks installed in the fsh. 677 * 678 * free() callback is the point after the handles associated with the hooks 679 * installed on this vfs_t become invalid 680 */ 681 void 682 fsh_exec_free_callbacks(vfs_t *vfsp) 683 { 684 fsh_callback_int_t *fshci; 685 fsh_callback_t *cb; 686 687 rw_enter(&fsh_cblist_lock, RW_READER); 688 for (fshci = list_head(&fsh_cblist); fshci != NULL; 689 fshci = list_next(&fsh_cblist, fshci)) { 690 cb = &fshci->fshci_cb; 691 if (cb->fshc_free != NULL) 692 (*(cb->fshc_free))(vfsp, cb->fshc_arg); 693 } 694 rw_exit(&fsh_cblist_lock); 695 } 696 697 /* 698 * API for vnode.c/vfs.c to start executing the fsh for a given operation. 699 * 700 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it 701 * does, it executes it. If not, underlying vnodeop/vfsop is called. 702 * 703 * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's 704 * absolutely necessary to call fsh_init() before using them. That's done in 705 * vfsinit(). 706 * 707 * While these functions are executing, it's expected that necessary vfs_t's 708 * are held so that vfs_free() isn't called. vfs_free() expects that noone 709 * accesses vfs_fshrecord of a given vfs_t. 710 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo() 711 * alive and valid. 712 * All these expectations are met because these functions are used only in 713 * correspondng {fop,fsop}_foo() functions. 714 */ 715 int 716 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 717 caller_context_t *ct) 718 { 719 int ret; 720 fsh_fsrecord_t *fsrecp; 721 fsh_int_t *fshi; 722 723 fsh_prepare_fsrec(vp->v_vfsp); 724 fsrecp = vp->v_vfsp->vfs_fshrecord; 725 726 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 727 if (!(fsrecp->fshfsr_enabled)) { 728 rw_exit(&fsrecp->fshfsr_lock); 729 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct)); 730 } 731 732 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 733 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 734 if (fshi->fshi_hooks.read != NULL) 735 if (fshi_hold(fshi)) 736 break; 737 } 738 rw_exit(&fsrecp->fshfsr_lock); 739 740 if (fshi == NULL) 741 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct)); 742 743 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg, 744 vp, uiop, ioflag, cr, ct); 745 fshi_rele(fshi); 746 return (ret); 747 } 748 749 int 750 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 751 caller_context_t *ct) 752 { 753 fsh_int_t *fshi; 754 int ret; 755 fsh_fsrecord_t *fsrecp; 756 757 fsh_prepare_fsrec(vp->v_vfsp); 758 fsrecp = vp->v_vfsp->vfs_fshrecord; 759 760 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 761 if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) { 762 rw_exit(&fsrecp->fshfsr_lock); 763 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct)); 764 } 765 766 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 767 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 768 if (fshi->fshi_hooks.write != NULL) 769 if (fshi_hold(fshi)) 770 break; 771 } 772 rw_exit(&fsrecp->fshfsr_lock); 773 774 if (fshi == NULL) 775 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct)); 776 777 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg, 778 vp, uiop, ioflag, cr, ct); 779 fshi_rele(fshi); 780 return (ret); 781 } 782 783 int 784 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 785 { 786 fsh_fsrecord_t *fsrecp; 787 fsh_int_t *fshi; 788 int ret; 789 790 fsh_prepare_fsrec(vfsp); 791 fsrecp = vfsp->vfs_fshrecord; 792 793 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 794 if (!(fsrecp->fshfsr_enabled)) { 795 rw_exit(&fsrecp->fshfsr_lock); 796 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr)); 797 } 798 799 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 800 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 801 if (fshi->fshi_hooks.mount != NULL) 802 if (fshi_hold(fshi)) 803 break; 804 } 805 rw_exit(&fsrecp->fshfsr_lock); 806 807 if (fshi == NULL) 808 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr)); 809 810 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg, 811 vfsp, mvp, uap, cr); 812 fshi_rele(fshi); 813 return (ret); 814 } 815 816 int 817 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr) 818 { 819 fsh_fsrecord_t *fsrecp; 820 fsh_int_t *fshi; 821 int ret; 822 823 fsh_prepare_fsrec(vfsp); 824 fsrecp = vfsp->vfs_fshrecord; 825 826 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 827 if (!(fsrecp->fshfsr_enabled)) { 828 rw_exit(&fsrecp->fshfsr_lock); 829 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr)); 830 } 831 832 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 833 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 834 if (fshi->fshi_hooks.unmount != NULL) 835 if (fshi_hold(fshi)) 836 break; 837 } 838 rw_exit(&fsrecp->fshfsr_lock); 839 840 if (fshi == NULL) 841 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr)); 842 843 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg, 844 vfsp, flag, cr); 845 fshi_rele(fshi); 846 return (ret); 847 } 848 849 /* 850 * This is the funtion used by fsh_prepare_fsrec() to allocate a new 851 * fsh_fsrecord. This function is called by the first function which 852 * access the vfs_fshrecord and finds out it's NULL. 853 */ 854 static fsh_fsrecord_t * 855 fsh_fsrec_create() 856 { 857 fsh_fsrecord_t *fsrecp; 858 859 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP); 860 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t), 861 offsetof(fsh_int_t, fshi_next)); 862 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL); 863 fsrecp->fshfsr_enabled = 1; 864 return (fsrecp); 865 } 866 867 868 /* 869 * This call can be used ONLY in vfs_free(). It's assumed that no other 870 * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed 871 * are executing while a call to fsh_fsrec_destroy() is made. With this 872 * assumptions, no concurrency issues occur. 873 * 874 * Before calling this function outside the fsh, it's sufficient and 875 * required to check if the passed fsh_fsrecord * is not NULL. We don't 876 * have to check if it is not equal to fsh_res_ptr, because all the fsh API 877 * calls involving this vfs_t should end before vfs_free() is called 878 * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is 879 * guaranteed by the explicit requirement that the caller of fsh API holds 880 * the vfs_t when needed. 881 * 882 * All the remaining hooks are being removed. 883 */ 884 void 885 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp) 886 { 887 fsh_int_t *fshi; 888 889 VERIFY(fsrecp != NULL); 890 891 _NOTE(CONSTCOND) 892 while (1) { 893 mutex_enter(&fsh_lock); 894 /* No need here to hold fshfsr_lock */ 895 fshi = list_remove_head(&fsrecp->fshfsr_list); 896 if (fshi == NULL) { 897 mutex_exit(&fsh_lock); 898 break; 899 } 900 ASSERT(fshi->fshi_doomed == 0); 901 list_remove(&fsh_map, fshi); 902 mutex_exit(&fsh_lock); 903 904 if (fshi->fshi_hooks.remove_cb != NULL) 905 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg, 906 fshi->fshi_handle); 907 id_free(fsh_idspace, fshi->fshi_handle); 908 mutex_destroy(&fshi->fshi_lock); 909 kmem_free(fshi, sizeof (*fshi)); 910 911 } 912 913 list_destroy(&fsrecp->fshfsr_list); 914 rw_destroy(&fsrecp->fshfsr_lock); 915 kmem_free(fsrecp, sizeof (*fsrecp)); 916 } 917 918 /* 919 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called 920 * before every other fsh call. 921 */ 922 void 923 fsh_init(void) 924 { 925 rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL); 926 list_create(&fsh_cblist, sizeof (fsh_callback_int_t), 927 offsetof(fsh_callback_int_t, fshci_next)); 928 929 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL); 930 931 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t, 932 fshi_global)); 933 934 /* See comment above fsh_prepare_fsrec() */ 935 fsh_res_ptr = (void *)-1; 936 937 fsh_idspace = id_space_create("fsh", 0, fsh_limit); 938 } 939 940 /* 941 * These functions are used to pass control to the next hook or underlying 942 * vop or vfsop. It's client doesn't have to worry about any locking. 943 */ 944 int 945 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag, 946 cred_t *cr, caller_context_t *ct) 947 { 948 int ret; 949 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord; 950 951 /* 952 * The passed fshi is the previous hook (the one from which we've been 953 * called). We need to find the next one. 954 */ 955 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 956 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 957 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 958 if (fshi->fshi_hooks.read != NULL) 959 if (fshi_hold(fshi)) 960 break; 961 } 962 rw_exit(&fsrecp->fshfsr_lock); 963 964 if (fshi == NULL) 965 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct)); 966 967 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg, 968 vp, uiop, ioflag, cr, ct); 969 fshi_rele(fshi); 970 return (ret); 971 } 972 973 int 974 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag, 975 cred_t *cr, caller_context_t *ct) 976 { 977 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord; 978 int ret; 979 980 /* 981 * The passed fshi is the previous hook (the one from which we've been 982 * called). We need to find the next one. 983 */ 984 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 985 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 986 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 987 if (fshi->fshi_hooks.write != NULL) 988 if (fshi_hold(fshi)) 989 break; 990 } 991 rw_exit(&fsrecp->fshfsr_lock); 992 993 if (fshi == NULL) 994 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct)); 995 996 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg, 997 vp, uiop, ioflag, cr, ct); 998 fshi_rele(fshi); 999 return (ret); 1000 } 1001 1002 int 1003 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, 1004 cred_t *cr) 1005 { 1006 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord; 1007 int ret; 1008 1009 /* 1010 * The passed fshi is the previous hook (the one from which we've been 1011 * called). We need to find the next one. 1012 */ 1013 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 1014 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 1015 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 1016 if (fshi->fshi_hooks.mount != NULL) 1017 if (fshi_hold(fshi)) 1018 break; 1019 } 1020 rw_exit(&fsrecp->fshfsr_lock); 1021 1022 if (fshi == NULL) 1023 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr)); 1024 1025 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg, 1026 vfsp, mvp, uap, cr); 1027 fshi_rele(fshi); 1028 return (ret); 1029 } 1030 1031 int 1032 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr) 1033 { 1034 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord; 1035 int ret; 1036 1037 /* 1038 * The passed fshi is the previous hook (the one from which we've been 1039 * called). We need to find the next one. 1040 */ 1041 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 1042 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 1043 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 1044 if (fshi->fshi_hooks.unmount != NULL) 1045 if (fshi_hold(fshi)) 1046 break; 1047 } 1048 rw_exit(&fsrecp->fshfsr_lock); 1049 1050 if (fshi == NULL) 1051 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr)); 1052 1053 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg, 1054 vfsp, flag, cr); 1055 fshi_rele(fshi); 1056 return (ret); 1057 }