1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2013 Damian Bogel. All rights reserved. 14 */ 15 16 #include <sys/debug.h> 17 #include <sys/errno.h> 18 #include <sys/fsh.h> 19 #include <sys/fsh_impl.h> 20 #include <sys/id_space.h> 21 #include <sys/kmem.h> 22 #include <sys/ksynch.h> 23 #include <sys/list.h> 24 #include <sys/sunddi.h> 25 #include <sys/sysmacros.h> 26 #include <sys/types.h> 27 #include <sys/vfs.h> 28 #include <sys/vnode.h> 29 30 /* 31 * Filesystem hook framework (fsh) 32 * 33 * 1. Abstract. 34 * The main goal of the filesystem hook framework is to provide an easy way to 35 * inject client-defined behaviour into vfs/vnode calls. fsh works on 36 * vfs_t granularity. 37 * 38 * 39 * 2. Overview. 40 * fsh_t is the main object in the fsh. An fsh_t is a structure containing: 41 * - pointers to hooking functions (named after corresponding 42 * vnodeops/vfsops) 43 * - a pointer to an argument to pass (this is shared for all the 44 * hooks in a given fsh_t) 45 * - a pointer to the *hook remove callback* - it's being fired after a 46 * hook is removed and the hook has stopped executing. It's safe to destroy 47 * any data associated with this hook. 48 * 49 * The information from fsh_t is copied by the fsh and an fsh_handle_t 50 * is returned. It should be used for further removing. 51 * 52 * 53 * 3. Usage. 54 * It is expected that vfs_t/vnode_t that are passed to fsh_foo() functions 55 * are held by the caller when needed. fsh does no vfs_t/vnode_t locking. 56 * 57 * fsh_t is a structure filled out by the client. If a client does not want 58 * to add/remove a hook for function foo(), he should fill the foo field of 59 * fsh_t with NULL. Every hook has a type of corresponding vfsop/vnodeop with 60 * two additional arguments: 61 * - fsh_int_t *fsh_int - this argument MUST be passed to 62 * hook_next_foo(). fsh wouldn't know which hook to execute next 63 * without it 64 * - void *arg - this is the argument passed with fsh_t during 65 * installation 66 * - void (*remove_cb)(void *, fsh_handle_t) - hook remove callback 67 * (mentioned earlier); it's first argument is arg, the second is the 68 * handle 69 * 70 * After installation, an fsh_handle_t is returned to the caller. 71 * 72 * Every hook function is responsible for passing the control to the next 73 * hook associated with a particular call. In order to provide an easy way to 74 * modify the behaviour of a function call both before and after the 75 * underlying vfsop/vnodeop (or next hook) execution, a hook has to call 76 * fsh_next_foo() at some point. This function does necessary internal 77 * operations and calls the next hook, until there's no hook left, then it 78 * calls the underlying vfsop/vnodeop. 79 * Example: 80 * my_freefs(fsh_int_t *fsh_int, void *arg, vfs_t *vfsp) { 81 * cmn_err(CE_NOTE, "freefs called!\n"); 82 * return (fsh_next_freefs(fsh_int, vfsp)); 83 * } 84 * 85 * 86 * A client might want to fire callbacks when vfs_t's are being mounted 87 * or freed. There's an fsh_callback_t structure provided to install such 88 * callbacks along with the API. 89 * It is legal to call fsh_hook_{install,remove}() inside a mount callback 90 * WITHOUT holding the vfs_t. 91 * 92 * After vfs_t's free callback returns, all the handles associated with the 93 * hooks installed on this vfs_t are invalid and must not be used. 94 * 95 * 96 * 4. API 97 * None of the APIs should be called during interrupt context above lock 98 * level. The only exceptions are fsh_next_foo() functions, which do not use 99 * locks. 100 * 101 * a) fsh.h 102 * Any of these functions could be called inside a hook or a hook remove 103 * callback. 104 * fsh_callback_{install,remove}() must not be called inside a {mount,free} 105 * callback. Doing so will cause a deadlock. Other functions can be called 106 * inside {mount,free} callbacks. 107 * 108 * fsh_fs_enable(vfs_t *vfsp) 109 * fsh_fs_disable(vfs_t *vfsp) 110 * Enables/disables fsh for a given vfs_t. 111 * 112 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks) 113 * Installs hooks on vfsp filesystem. 114 * It's important that hooks are executed in LIFO installation order, 115 * which means that if there are hooks A and B installed in this order, B 116 * is going to be executed before A. 117 * It returns a correct handle, or (-1) if hook/callback limit exceeded. 118 * The handle is valid until a free callback returns or an explicit call 119 * to fsh_hook_remove(). 120 * 121 * fsh_hook_remove(fsh_handle_t handle) 122 * Removes a hook and invalidates the handle. 123 * It is guaranteed that after this funcion returns, calls to 124 * vnodeops/vfsops won't go through this hook, although there might be 125 * some threads still executing this hook. When hook remove callback is 126 * fired, it is guaranteed that the hook won't be executed anymore. It is 127 * safe to remove all the internal data associated with this hook inside 128 * the hook remove callback. 129 * 130 * fsh_next_foo(fsh_int_t *fsh_int, void *arg, ARGUMENTS) 131 * This is the function which should be called once in every hook. It 132 * does the necessary internal operations and passes control to the 133 * next hook or, if there's no hook left, to the underlying 134 * vfsop/vnodeop. 135 * 136 * fsh_callback_install(fsh_callback_t *callback) 137 * fsh_callback_remove(fsh_callback_handle_t handle) 138 * Installs/removes callbacks for vfs_t mount/free. The mount callback 139 * is executed right before domount() returns. The free callback is 140 * called right before VFS_FREEVFS() is called. 141 * The fsh_callback_install() returns a correct handle, or (-1) if 142 * hook/callback limit exceeded. 143 * 144 * b) fsh_impl.h (for vfs.c and vnode.c only) 145 * fsh_init() 146 * This call has to be done in vfsinit(). It initialises the fsh. It 147 * is absolutely necessary that this call is made before any other fsh 148 * operation. 149 * 150 * fsh_exec_mount_callbacks(vfs_t *vfsp) 151 * fsh_exec_free_callbacks(vfs_t *vfsp) 152 * Used to execute all fsh callbacks for {mount,free} of a vfs_t. 153 * 154 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp) 155 * Destroys an fsh_fsrecord structure. All the hooks installed on this 156 * vfs_t are then destroyed. free callback is called before this function. 157 * 158 * fsh_foo(ARGUMENTS) 159 * Function used to start executing the hook chain for a given call. 160 * 161 * 162 * 5. Internals. 163 * fsh_int_t is an internal hook structure. It is reference counted. 164 * fshi_hold() and fshi_rele() should be used whenever needed. 165 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list 166 * (local to vfs_t). All entries are unique and are identified by fshi_handle. 167 * 168 * fsh_int_t properties: 169 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both 170 * fsh_map and fshfsr_list 171 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook 172 * from fsh_map and marks the hook as *doomed* 173 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread 174 * executing it 175 * - if fsh_int_t is marked as *doomed*, the reference counter is not 176 * be increased and thus no thread can acquire this fsh_int_t 177 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this 178 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map 179 * - fsh_int_t could be also destroyed without fsh_hook_remove() call, 180 * that happens only inside fsh_fsrec_destroy() where it is guaranteed 181 * that there is no thread executing the hook 182 * 183 * 184 * fsh_fsrecord_t is a structure which lives inside a vfs_t. 185 * fsh_fsrecord_t contains: 186 * - an rw-lock that protects the structure 187 * - a list of hooks installed on this vfs_t 188 * - a flag which tells whether fsh is enabled on this vfs_t 189 * 190 * 191 * fsh_prepare_fsrec rule: 192 * Every function that needs vfsp->vfs_fshrecord has to call 193 * fsh_prepare_fsrec() first. If and only if the call is made, it is safe to 194 * use vfsp->vfs_fshrecord. 195 * 196 * Unfortunately, because of unexpected behaviour of some filesystems (no use 197 * of vfs_alloc()/vfs_init()) there's no good place to initialise the 198 * fsh_fshrecord_t structure. The approach being used here is to check if it's 199 * initialised in every call. Because of the fact that no lock could be used 200 * here (the same problem with initialisation), a spinlock is used. This is 201 * explained in more detail in a comment before fsh_prepare_fsrec(). After 202 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord 203 * pointer locally, because it won't be changed until vfs_free() is called. 204 * 205 * The only exception from the fsh_prepare_fsrec() rule is vfs_free(), 206 * where there is expected that no other fsh calls would be made for the 207 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a 208 * valid pointer and could not be concurrently accessed. 209 * 210 * When there are no fsh functions (that use a particular fsh_fsrecord_t) 211 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It 212 * would be NULL or a pointer to an initialised fsh_fsrecord_t. 213 * 214 * 215 * Callbacks: 216 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right 217 * before returning from domount()@vfs.c. 218 * 219 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right 220 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0. 221 * 222 * 223 * fsh_next_foo(fsh_int_t *fshi, ARGUMENTS) 224 * This function is quite simple. It takes the fsh_int_t and passes control 225 * to the next hook or to the underlying vnodeop/vfsop. 226 * 227 * 228 * 6. Locking 229 * a) public 230 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is 231 * needed, the client does that. 232 * 233 * fsh_callback_{install,remove} must not be called inside a callback, because 234 * it will cause a deadlock. 235 * 236 * b) internal 237 * Locking diagram: 238 * 239 * fsh_hook_install() fsh_hook_remove() fsh_fsrec_destroy() 240 * | | | 241 * | | | 242 * +------------------+ | +------------+ 243 * | | | 244 * V V V 245 * fsh_lock 246 * | | 247 * | +----- fshfsr_lock, RW_WRITER ---+ 248 * | | 249 * V | 250 * +---------------------------------------+ | 251 * | fsh_map | | 252 * | | | 253 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|--------------+ 254 * | +------------------------------^--------+ 255 * | | 256 * | | 257 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER 258 * | | 259 * | | 260 * fsh_read(), fshi_rele() 261 * fsh_write(), 262 * ..., Might be called from: 263 * fsh_next_read(), fsh_hook_remove() 264 * fsh_next_write(), fsh_read(), fsh_write(), ... 265 * ... fsh_next_read(), fsh_next_write(), ... 266 * 267 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install, 268 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since 269 * it destroys the unremoved hooks). It is used only when fsh_map needs to be 270 * locked. The usage of this lock guarantees that the data in fsh_map and 271 * fshfsr_lists is consistent. 272 */ 273 274 275 /* Internals */ 276 struct fsh_int { 277 fsh_handle_t fshi_handle; 278 fsh_t fshi_hooks; 279 vfs_t *fshi_vfsp; 280 281 kmutex_t fshi_lock; 282 uint64_t fshi_ref; 283 uint64_t fshi_doomed; /* changed inside fsh_lock */ 284 285 /* next node in fshfsr_list */ 286 list_node_t fshi_next; 287 288 /* next node in fsh_map */ 289 list_node_t fshi_global; 290 }; 291 292 typedef struct fsh_callback_int { 293 fsh_callback_t fshci_cb; 294 fsh_callback_handle_t fshci_handle; 295 list_node_t fshci_next; 296 } fsh_callback_int_t; 297 298 299 static kmutex_t fsh_lock; 300 301 /* 302 * fsh_fsrecord_t is the main internal structure. It's content is protected 303 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for 304 * the vfs_t that contains the fsh_fsrecord_t. 305 */ 306 struct fsh_fsrecord { 307 krwlock_t fshfsr_lock; 308 int fshfsr_enabled; 309 list_t fshfsr_list; 310 }; 311 312 /* 313 * Global list of fsh_int_t. Protected by fsh_lock. 314 */ 315 static list_t fsh_map; 316 317 /* 318 * Global list of fsh_callback_int_t. 319 */ 320 static krwlock_t fsh_cblist_lock; 321 static list_t fsh_cblist; 322 323 /* 324 * A reserved pointer for fsh purposes. It is used because of the method 325 * chosen for solving concurrency issues with vfs_fshrecord. The full 326 * explanation is in the big theory statement at the beginning of this 327 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init(). 328 */ 329 static void *fsh_res_ptr; 330 331 static fsh_fsrecord_t *fsh_fsrec_create(); 332 333 int fsh_limit = INT_MAX; 334 static id_space_t *fsh_idspace; 335 336 /* 337 * fsh_prepare_fsrec() 338 * 339 * Important note: 340 * Before using this function, fsh_init() MUST be called. We do that in 341 * vfsinit()@vfs.c. 342 * 343 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the 344 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not 345 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled 346 * filesystems could do the same thing. That's why this solution is 347 * introduced. It should be called before any code that needs access to 348 * vfs_fshrecord. 349 * 350 * Locking: 351 * There are no locks here, because there's no good place to initialise 352 * the lock. Concurrency issues are solved by using atomic instructions 353 * and a spinlock, which is spinning only once for a given vfs_t. Because 354 * of that, the usage of the spinlock isn't bad at all. 355 * 356 * How it works: 357 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to 358 * fsh_res_ptr. That's a signal for other threads, that the structure 359 * is being initialised. 360 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait, 361 * because vfs_fshrecord is being initialised by another call. 362 * c) other cases: 363 * vfs_fshrecord is already initialised, so we can use it. It won't change 364 * until vfs_free() is called. It can't happen when someone is holding 365 * the vfs_t, which is expected from the caller of fsh API. 366 */ 367 static void 368 fsh_prepare_fsrec(vfs_t *vfsp) 369 { 370 fsh_fsrecord_t *fsrec; 371 372 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL, 373 fsh_res_ptr)) == fsh_res_ptr) 374 ; 375 376 if (fsrec == NULL) 377 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create()); 378 } 379 380 /* 381 * API for enabling/disabling fsh per vfs_t. 382 * 383 * A newly created vfs_t has fsh enabled by default. If one would want to change 384 * this behaviour, mount callbacks could be used. 385 * 386 * The caller is expected to hold the vfs_t. 387 * 388 * These functions must NOT be called in a hook. 389 */ 390 void 391 fsh_fs_enable(vfs_t *vfsp) 392 { 393 fsh_prepare_fsrec(vfsp); 394 395 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 396 vfsp->vfs_fshrecord->fshfsr_enabled = 1; 397 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 398 } 399 400 void 401 fsh_fs_disable(vfs_t *vfsp) 402 { 403 fsh_prepare_fsrec(vfsp); 404 405 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 406 vfsp->vfs_fshrecord->fshfsr_enabled = 0; 407 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 408 } 409 410 /* 411 * API used for installing hooks. fsh_handle_t is returned for further 412 * actions (currently just removing) on this set of hooks. 413 * 414 * fsh_t fields: 415 * - arg - argument passed to every hook 416 * - remove_cb - remove callback, called after a hook is removed and all the 417 * threads stops executing it 418 * - read, write, ... - pointers to hooks for corresponding vnodeops/vfsops; 419 * if there is no hook desired for an operation, it should be set to 420 * NULL 421 * 422 * It's important that the hooks are executed in LIFO installation order (they 423 * are added to the head of the hook list). 424 * 425 * The caller is expected to hold the vfs_t. 426 * 427 * Returns (-1) if hook/callback limit exceeded, handle otherwise. 428 */ 429 fsh_handle_t 430 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks) 431 { 432 fsh_handle_t handle; 433 fsh_int_t *fshi; 434 435 fsh_prepare_fsrec(vfsp); 436 437 if ((handle = id_alloc(fsh_idspace)) == -1) 438 return (-1); 439 440 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP); 441 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL); 442 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks)); 443 fshi->fshi_handle = handle; 444 fshi->fshi_doomed = 0; 445 fshi->fshi_ref = 1; 446 fshi->fshi_vfsp = vfsp; 447 448 mutex_enter(&fsh_lock); 449 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 450 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi); 451 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 452 453 list_insert_head(&fsh_map, fshi); 454 mutex_exit(&fsh_lock); 455 456 return (handle); 457 } 458 459 static int 460 fshi_hold(fsh_int_t *fshi) 461 { 462 int can_hold; 463 464 mutex_enter(&fshi->fshi_lock); 465 if (fshi->fshi_doomed == 1) { 466 can_hold = 0; 467 } else { 468 fshi->fshi_ref++; 469 can_hold = 1; 470 } 471 mutex_exit(&fshi->fshi_lock); 472 473 return (can_hold); 474 } 475 476 /* 477 * This function must not be called while fshfsr_lock is held. Doing so could 478 * cause a deadlock. 479 */ 480 static void 481 fshi_rele(fsh_int_t *fshi) 482 { 483 int destroy; 484 485 mutex_enter(&fshi->fshi_lock); 486 ASSERT(fshi->fshi_ref > 0); 487 fshi->fshi_ref--; 488 if (fshi->fshi_ref == 0) { 489 ASSERT(fshi->fshi_doomed == 1); 490 destroy = 1; 491 } else { 492 destroy = 0; 493 } 494 mutex_exit(&fshi->fshi_lock); 495 496 if (destroy) { 497 /* 498 * At this point, we are sure that fsh_hook_remove() has been 499 * called, that's why we don't remove the fshi from fsh_map. 500 * fsh_hook_remove() did that already. 501 */ 502 fsh_fsrecord_t *fsrecp; 503 504 if (fshi->fshi_hooks.remove_cb != NULL) 505 (*fshi->fshi_hooks.remove_cb)( 506 fshi->fshi_hooks.arg, fshi->fshi_handle); 507 /* 508 * We don't have to call fsh_prepare_fsrec() here. 509 * fsh_fsrecord_t is already initialised, because we've found a 510 * mapping for the given handle. 511 */ 512 fsrecp = fshi->fshi_vfsp->vfs_fshrecord; 513 ASSERT(fsrecp != NULL); 514 ASSERT(fsrecp != fsh_res_ptr); 515 516 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER); 517 list_remove(&fsrecp->fshfsr_list, fshi); 518 rw_exit(&fsrecp->fshfsr_lock); 519 520 id_free(fsh_idspace, fshi->fshi_handle); 521 mutex_destroy(&fshi->fshi_lock); 522 kmem_free(fshi, sizeof (*fshi)); 523 } 524 } 525 526 /* 527 * Used for removing a hook set. 528 * 529 * fsh_hook_remove() invalidates the given handle. 530 * 531 * It is guaranteed, that after successful return from fsh_hook_remove(), 532 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't 533 * go through this hook. 534 * 535 * There is no guarantee that after fsh_hook_remove() returns, the hook 536 * associated with the handle won't be executing. Instead, it is guaranteed that 537 * when remove_cb() is called, the hook finished it's execution in all threads. 538 * It is safe to destroy all internal data associated with this hook inside 539 * remove_cb(). 540 * 541 * It is possible that remove_cb() would be called before fsh_hook_remove() 542 * returns. 543 * 544 * Returns (-1) if hook wasn't found, 0 otherwise. 545 */ 546 int 547 fsh_hook_remove(fsh_handle_t handle) 548 { 549 fsh_int_t *fshi; 550 551 mutex_enter(&fsh_lock); 552 for (fshi = list_head(&fsh_map); fshi != NULL; 553 fshi = list_next(&fsh_map, fshi)) { 554 if (fshi->fshi_handle == handle) { 555 list_remove(&fsh_map, fshi); 556 break; 557 } 558 } 559 560 if (fshi == NULL) 561 return (-1); 562 563 mutex_enter(&fshi->fshi_lock); 564 ASSERT(fshi->fshi_doomed == 0); 565 fshi->fshi_doomed = 1; 566 mutex_exit(&fshi->fshi_lock); 567 mutex_exit(&fsh_lock); 568 569 fshi_rele(fshi); 570 571 return (0); 572 } 573 574 /* 575 * API for installing global mount/free callbacks. 576 * 577 * fsh_callback_t fields: 578 * fshc_arg - argument passed to the callbacks 579 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count 580 * drops to 0 581 * fshc_mount - callback fired right before returning from domount() 582 * The first argument of these callbacks is the vfs_t that is mounted/freed. 583 * The second one is the fshc_arg. 584 * 585 * fsh_callback_handle_t is filled out by this function. 586 * 587 * This function must NOT be called in a callback, because it will cause 588 * a deadlock. 589 * 590 * Returns (-1) if hook/callback limit exceeded. 591 */ 592 fsh_callback_handle_t 593 fsh_callback_install(fsh_callback_t *callback) 594 { 595 fsh_callback_int_t *fshci; 596 fsh_callback_handle_t handle; 597 598 if ((handle = id_alloc(fsh_idspace)) == -1) 599 return (-1); 600 601 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP); 602 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb)); 603 fshci->fshci_handle = handle; 604 605 /* If it is called in a {mount,free} callback, causes deadlock. */ 606 rw_enter(&fsh_cblist_lock, RW_WRITER); 607 list_insert_head(&fsh_cblist, fshci); 608 rw_exit(&fsh_cblist_lock); 609 610 return (handle); 611 } 612 613 /* 614 * API for removing global mount/free callbacks. 615 * 616 * This function must NOT be called in a callback, because it will cause 617 * a deadlock. 618 * 619 * Returns (-1) if callback wasn't found, 0 otherwise. 620 */ 621 int 622 fsh_callback_remove(fsh_callback_handle_t handle) 623 { 624 fsh_callback_int_t *fshci; 625 626 /* If it is called in a {mount,free} callback, causes deadlock. */ 627 rw_enter(&fsh_cblist_lock, RW_WRITER); 628 for (fshci = list_head(&fsh_cblist); fshci != NULL; 629 fshci = list_next(&fsh_cblist, fshci)) { 630 if (fshci->fshci_handle == handle) { 631 list_remove(&fsh_cblist, fshci); 632 break; 633 } 634 } 635 rw_exit(&fsh_cblist_lock); 636 637 if (fshci == NULL) 638 return (-1); 639 640 kmem_free(fshci, sizeof (*fshci)); 641 id_free(fsh_idspace, handle); 642 643 return (0); 644 } 645 646 /* 647 * This function is executed right before returning from domount()@vfs.c. 648 * We are sure that it's called only after fsh_init(). 649 * It executes all the mount callbacks installed in the fsh. 650 * 651 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal 652 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding 653 * this vfs_t. This guarantee should be preserved, because it's in the "Usage" 654 * section in the big theory statement at the top of this file. 655 */ 656 void 657 fsh_exec_mount_callbacks(vfs_t *vfsp) 658 { 659 fsh_callback_int_t *fshci; 660 fsh_callback_t *cb; 661 662 rw_enter(&fsh_cblist_lock, RW_READER); 663 for (fshci = list_head(&fsh_cblist); fshci != NULL; 664 fshci = list_next(&fsh_cblist, fshci)) { 665 cb = &fshci->fshci_cb; 666 if (cb->fshc_mount != NULL) 667 (*(cb->fshc_mount))(vfsp, cb->fshc_arg); 668 } 669 rw_exit(&fsh_cblist_lock); 670 } 671 672 /* 673 * This function is executed right before VFS_FREEVFS() is called in 674 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init(). 675 * It executes all the free callbacks installed in the fsh. 676 * 677 * free() callback is the point after the handles associated with the hooks 678 * installed on this vfs_t become invalid 679 */ 680 void 681 fsh_exec_free_callbacks(vfs_t *vfsp) 682 { 683 fsh_callback_int_t *fshci; 684 fsh_callback_t *cb; 685 686 rw_enter(&fsh_cblist_lock, RW_READER); 687 for (fshci = list_head(&fsh_cblist); fshci != NULL; 688 fshci = list_next(&fsh_cblist, fshci)) { 689 cb = &fshci->fshci_cb; 690 if (cb->fshc_free != NULL) 691 (*(cb->fshc_free))(vfsp, cb->fshc_arg); 692 } 693 rw_exit(&fsh_cblist_lock); 694 } 695 696 /* 697 * API for vnode.c/vfs.c to start executing the fsh for a given operation. 698 * 699 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it 700 * does, it executes it. If not, underlying vnodeop/vfsop is called. 701 * 702 * These interfaces are using fsh_res_ptr (in fsh_prepare_fsrec()), so it's 703 * absolutely necessary to call fsh_init() before using them. That's done in 704 * vfsinit(). 705 * 706 * While these functions are executing, it's expected that necessary vfs_t's 707 * are held so that vfs_free() isn't called. vfs_free() expects that noone 708 * accesses vfs_fshrecord of a given vfs_t. 709 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo() 710 * alive and valid. 711 * All these expectations are met because these functions are used only in 712 * correspondng {fop,fsop}_foo() functions. 713 */ 714 int 715 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 716 caller_context_t *ct) 717 { 718 int ret; 719 fsh_fsrecord_t *fsrecp; 720 fsh_int_t *fshi; 721 722 fsh_prepare_fsrec(vp->v_vfsp); 723 fsrecp = vp->v_vfsp->vfs_fshrecord; 724 725 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 726 if (!(fsrecp->fshfsr_enabled)) { 727 rw_exit(&fsrecp->fshfsr_lock); 728 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct)); 729 } 730 731 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 732 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 733 if (fshi->fshi_hooks.read != NULL) 734 if (fshi_hold(fshi)) 735 break; 736 } 737 rw_exit(&fsrecp->fshfsr_lock); 738 739 if (fshi == NULL) 740 return ((*(vp->v_op->vop_read))(vp, uiop, ioflag, cr, ct)); 741 742 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg, 743 vp, uiop, ioflag, cr, ct); 744 fshi_rele(fshi); 745 return (ret); 746 } 747 748 int 749 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 750 caller_context_t *ct) 751 { 752 fsh_int_t *fshi; 753 int ret; 754 fsh_fsrecord_t *fsrecp; 755 756 fsh_prepare_fsrec(vp->v_vfsp); 757 fsrecp = vp->v_vfsp->vfs_fshrecord; 758 759 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 760 if (!(vp->v_vfsp->vfs_fshrecord->fshfsr_enabled)) { 761 rw_exit(&fsrecp->fshfsr_lock); 762 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct)); 763 } 764 765 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 766 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 767 if (fshi->fshi_hooks.write != NULL) 768 if (fshi_hold(fshi)) 769 break; 770 } 771 rw_exit(&fsrecp->fshfsr_lock); 772 773 if (fshi == NULL) 774 return ((*(vp->v_op->vop_write))(vp, uiop, ioflag, cr, ct)); 775 776 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg, 777 vp, uiop, ioflag, cr, ct); 778 fshi_rele(fshi); 779 return (ret); 780 } 781 782 int 783 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 784 { 785 fsh_fsrecord_t *fsrecp; 786 fsh_int_t *fshi; 787 int ret; 788 789 fsh_prepare_fsrec(vfsp); 790 fsrecp = vfsp->vfs_fshrecord; 791 792 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 793 if (!(fsrecp->fshfsr_enabled)) { 794 rw_exit(&fsrecp->fshfsr_lock); 795 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr)); 796 } 797 798 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 799 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 800 if (fshi->fshi_hooks.mount != NULL) 801 if (fshi_hold(fshi)) 802 break; 803 } 804 rw_exit(&fsrecp->fshfsr_lock); 805 806 if (fshi == NULL) 807 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr)); 808 809 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg, 810 vfsp, mvp, uap, cr); 811 fshi_rele(fshi); 812 return (ret); 813 } 814 815 int 816 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr) 817 { 818 fsh_fsrecord_t *fsrecp; 819 fsh_int_t *fshi; 820 int ret; 821 822 fsh_prepare_fsrec(vfsp); 823 fsrecp = vfsp->vfs_fshrecord; 824 825 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 826 if (!(fsrecp->fshfsr_enabled)) { 827 rw_exit(&fsrecp->fshfsr_lock); 828 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr)); 829 } 830 831 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 832 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 833 if (fshi->fshi_hooks.unmount != NULL) 834 if (fshi_hold(fshi)) 835 break; 836 } 837 rw_exit(&fsrecp->fshfsr_lock); 838 839 if (fshi == NULL) 840 return ((*(vfsp->vfs_op->vfs_unmount))(vfsp, flag, cr)); 841 842 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg, 843 vfsp, flag, cr); 844 fshi_rele(fshi); 845 return (ret); 846 } 847 848 /* 849 * This is the funtion used by fsh_prepare_fsrec() to allocate a new 850 * fsh_fsrecord. This function is called by the first function which 851 * access the vfs_fshrecord and finds out it's NULL. 852 */ 853 static fsh_fsrecord_t * 854 fsh_fsrec_create() 855 { 856 fsh_fsrecord_t *fsrecp; 857 858 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP); 859 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t), 860 offsetof(fsh_int_t, fshi_next)); 861 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL); 862 fsrecp->fshfsr_enabled = 1; 863 return (fsrecp); 864 } 865 866 867 /* 868 * This call can be used ONLY in vfs_free(). It's assumed that no other 869 * fsh calls using the vfs_t that owns the fsh_fsrecord to be destroyed 870 * are executing while a call to fsh_fsrec_destroy() is made. With this 871 * assumptions, no concurrency issues occur. 872 * 873 * Before calling this function outside the fsh, it's sufficient and 874 * required to check if the passed fsh_fsrecord * is not NULL. We don't 875 * have to check if it is not equal to fsh_res_ptr, because all the fsh API 876 * calls involving this vfs_t should end before vfs_free() is called 877 * (outside the fsh, fsh_fsrecord is never equal to fsh_res_ptr). That is 878 * guaranteed by the explicit requirement that the caller of fsh API holds 879 * the vfs_t when needed. 880 * 881 * All the remaining hooks are being removed. 882 */ 883 void 884 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp) 885 { 886 fsh_int_t *fshi; 887 888 VERIFY(fsrecp != NULL); 889 890 _NOTE(CONSTCOND) 891 while (1) { 892 mutex_enter(&fsh_lock); 893 fshi = list_remove_head(&fsrecp->fshfsr_list); 894 if (fshi == NULL) { 895 mutex_exit(&fsh_lock); 896 break; 897 } 898 ASSERT(fshi->fshi_doomed == 0); 899 list_remove(&fsh_map, fshi); 900 mutex_exit(&fsh_lock); 901 902 if (fshi->fshi_hooks.remove_cb != NULL) 903 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg, 904 fshi->fshi_handle); 905 id_free(fsh_idspace, fshi->fshi_handle); 906 mutex_destroy(&fshi->fshi_lock); 907 kmem_free(fshi, sizeof (*fshi)); 908 909 } 910 911 list_destroy(&fsrecp->fshfsr_list); 912 rw_destroy(&fsrecp->fshfsr_lock); 913 kmem_free(fsrecp, sizeof (*fsrecp)); 914 } 915 916 /* 917 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called 918 * before every other fsh call. 919 */ 920 void 921 fsh_init(void) 922 { 923 rw_init(&fsh_cblist_lock, NULL, RW_DRIVER, NULL); 924 list_create(&fsh_cblist, sizeof (fsh_callback_int_t), 925 offsetof(fsh_callback_int_t, fshci_next)); 926 927 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL); 928 929 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t, 930 fshi_global)); 931 932 /* See comment above fsh_prepare_fsrec() */ 933 fsh_res_ptr = (void *)-1; 934 935 fsh_idspace = id_space_create("fsh", 0, fsh_limit); 936 } 937 938 /* 939 * These functions are used to pass control to the next hook or underlying 940 * vop or vfsop. It's client doesn't have to worry about any locking. 941 */ 942 int 943 fsh_next_read(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag, 944 cred_t *cr, caller_context_t *ct) 945 { 946 int ret; 947 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord; 948 949 /* 950 * The passed fshi is the previous hook (the one from which we've been 951 * called). We need to find the next one. 952 */ 953 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 954 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 955 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 956 if (fshi->fshi_hooks.read != NULL) 957 if (fshi_hold(fshi)) 958 break; 959 } 960 rw_exit(&fsrecp->fshfsr_lock); 961 962 if (fshi == NULL) 963 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct)); 964 965 ret = (*fshi->fshi_hooks.read)(fshi, fshi->fshi_hooks.arg, 966 vp, uiop, ioflag, cr, ct); 967 fshi_rele(fshi); 968 return (ret); 969 } 970 971 int 972 fsh_next_write(fsh_int_t *fshi, vnode_t *vp, uio_t *uiop, int ioflag, 973 cred_t *cr, caller_context_t *ct) 974 { 975 fsh_fsrecord_t *fsrecp = vp->v_vfsp->vfs_fshrecord; 976 int ret; 977 978 /* 979 * The passed fshi is the previous hook (the one from which we've been 980 * called). We need to find the next one. 981 */ 982 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 983 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 984 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 985 if (fshi->fshi_hooks.write != NULL) 986 if (fshi_hold(fshi)) 987 break; 988 } 989 rw_exit(&fsrecp->fshfsr_lock); 990 991 if (fshi == NULL) 992 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct)); 993 994 ret = (*fshi->fshi_hooks.write)(fshi, fshi->fshi_hooks.arg, 995 vp, uiop, ioflag, cr, ct); 996 fshi_rele(fshi); 997 return (ret); 998 } 999 1000 int 1001 fsh_next_mount(fsh_int_t *fshi, vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, 1002 cred_t *cr) 1003 { 1004 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord; 1005 int ret; 1006 1007 /* 1008 * The passed fshi is the previous hook (the one from which we've been 1009 * called). We need to find the next one. 1010 */ 1011 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 1012 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 1013 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 1014 if (fshi->fshi_hooks.mount != NULL) 1015 if (fshi_hold(fshi)) 1016 break; 1017 } 1018 rw_exit(&fsrecp->fshfsr_lock); 1019 1020 if (fshi == NULL) 1021 return ((*(vfsp->vfs_op->vfs_mount))(vfsp, mvp, uap, cr)); 1022 1023 ret = (*fshi->fshi_hooks.mount)(fshi, fshi->fshi_hooks.arg, 1024 vfsp, mvp, uap, cr); 1025 fshi_rele(fshi); 1026 return (ret); 1027 } 1028 1029 int 1030 fsh_next_unmount(fsh_int_t *fshi, vfs_t *vfsp, int flag, cred_t *cr) 1031 { 1032 fsh_fsrecord_t *fsrecp = vfsp->vfs_fshrecord; 1033 int ret; 1034 1035 /* 1036 * The passed fshi is the previous hook (the one from which we've been 1037 * called). We need to find the next one. 1038 */ 1039 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 1040 for (fshi = list_next(&fsrecp->fshfsr_list, fshi); fshi != NULL; 1041 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 1042 if (fshi->fshi_hooks.unmount != NULL) 1043 if (fshi_hold(fshi)) 1044 break; 1045 } 1046 rw_exit(&fsrecp->fshfsr_lock); 1047 1048 if (fshi == NULL) 1049 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr)); 1050 1051 ret = (*fshi->fshi_hooks.unmount)(fshi, fshi->fshi_hooks.arg, 1052 vfsp, flag, cr); 1053 fshi_rele(fshi); 1054 return (ret); 1055 }