1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2013 Damian Bogel. All rights reserved. 14 */ 15 16 #include <sys/debug.h> 17 #include <sys/errno.h> 18 #include <sys/fsh.h> 19 #include <sys/fsh_impl.h> 20 #include <sys/id_space.h> 21 #include <sys/kmem.h> 22 #include <sys/ksynch.h> 23 #include <sys/list.h> 24 #include <sys/sunddi.h> 25 #include <sys/sysmacros.h> 26 #include <sys/types.h> 27 #include <sys/vfs.h> 28 #include <sys/vnode.h> 29 30 /* 31 * Filesystem hook framework (fsh) 32 * 33 * 1. Abstract. 34 * The main goal of the filesystem hook framework is to provide an easy way to 35 * inject client-defined behaviour into vfs/vnode calls. fsh works on 36 * vfs_t granularity. 37 * 38 * Note: In this document, both an fsh_t structure and hooking function for a 39 * vnodeop/vfsop is referred to as *hook*. 40 * 41 * 42 * 2. Overview. 43 * fsh_t is the main object in the fsh. An fsh_t is a structure containing: 44 * - pointers to hooking functions 45 * - an argument to pass (this is shared for all the hooks in a given 46 * fsh_t) 47 * - a pointer to the *hook remove callback* 48 * 49 * The information from fsh_t is copied by the fsh and an fsh_handle_t 50 * is returned. It should be used for further removing. 51 * 52 * 53 * 3. Usage. 54 * It is expected that vfs_t/vnode_t passed to fsh_foo() functions are held by 55 * the caller when needed. fsh does no vfs_t/vnode_t locking. 56 * 57 * fsh_t is a structure filled out by the client. It contains: 58 * - pointers to hooking functions 59 * - the argument passed to the hooks 60 * - the *hook remove callback* 61 * 62 * If a client does not want to add a hook for function foo(), he should fill 63 * corresponding fields with NULLs. For every vfsop/vnodeop there are two 64 * fields: pre_foo() and post_foo(). These are the functions called before and 65 * after the next hook or underlying vfsop/vnodeop. 66 * 67 * Pre hooks take: 68 * - arg 69 * - pointer to a field containing void* - it should be filled whenever 70 * the client wants to have some data shared by the pre and post hooks in 71 * the same syscall execution. This is called the *instance data*. 72 * - pointers to the arguments passed to the underlying vfsop/vnodeop 73 * Pre hooks return void. 74 * 75 * Post hooks take: 76 * - value returned by the previous post hook or underlying vfsop/vnodeop 77 * - arg 78 * - pointer to the *instance data* 79 * - arguments passed to the underlying vfsop/vnodeop 80 * Post hooks return an int, which should be treated as the vfsop/vnodeop 81 * return value. 82 * Memory allocated by pre hook must be deallocated by the post hook. 83 * 84 * Execution path of hooks A, B, C is as follows: 85 * foo() 86 * preA(argA, &instancepA, ...); 87 * preB(argB, &instancepB, ...); 88 * preC(argC, &instancepC, ...); 89 * ret = VOP_FOO(); 90 * ret = postC(ret, argC, instancepC, ...); 91 * ret = postB(ret, argB, instancepB, ...); 92 * ret = postC(ret, argA, instancepA, ...); 93 * return (ret); 94 * 95 * After installation, an fsh_handle_t is returned to the caller. 96 * 97 * Hook remove callback - it's a function being fired after a hook is removed 98 * and no thread is going to execute it anymore. It's safe to destroy all the 99 * data associated with this hook inside it. 100 * 101 * It is guaranteed, that whenever a pre_hook() is called, there will be also 102 * post_hook() called within the same syscall. 103 * 104 * If a hook (HNew) is installed/removed on/from a vfs_t within execution of 105 * another hook (HExec) installed on this vfs_t, the syscall that executes 106 * HExec won't fire HNew. 107 * 108 * A client might want to fire callbacks when vfs_ts are being mounted 109 * or freed. There's an fsh_callback_t structure provided to install such 110 * callbacks along with the API. 111 * It is legal to call fsh_hook_{install,remove}() inside a mount callback 112 * WITHOUT holding the vfs_t. 113 * 114 * After vfs_t's free callback returns, all the handles associated with the 115 * hooks installed on this vfs_t are invalid and must not be used. 116 * 117 * 4. API 118 * None of the APIs should be called during interrupt context above lock 119 * level. 120 * 121 * a) fsh.h 122 * Any of these functions could be called in a hook or a hook remove callback. 123 * The only functions that must not be called inside a {mount,free} callback are 124 * fsd_callback_{install,remove}. Using them will cause a deadlock. 125 * 126 * 127 * fsh_fs_enable(vfs_t *vfsp) 128 * fsh_fs_disable(vfs_t *vfsp) 129 * Enables/disables fsh for a given vfs_t. 130 * 131 * fsh_hook_install(vfs_t *vfsp, fsh_t *hooks) 132 * Installs hooks on vfsp filesystem. 133 * It's important that hooks are executed in LIFO installation order, 134 * which means that if there are hooks A and B installed in this order, B 135 * is going to be executed before A. 136 * It returns a correct handle, or (-1) if hook/callback limit exceeded. 137 * The handle is valid until a free callback returns or an explicit call 138 * to fsh_hook_remove(). 139 * 140 * fsh_hook_remove(fsh_handle_t handle) 141 * Removes a hook and invalidates the handle. 142 * It is guaranteed that after this funcion returns, calls to 143 * vnodeops/vfsops won't go through this hook, although there might be 144 * some threads still executing this hook. When hook remove callback is 145 * fired, it is guaranteed that the hook won't be executed anymore. It is 146 * safe to remove all the internal data associated with this hook inside 147 * the hook remove callback. The hook remove callback could be called 148 * inside fsh_hook_remove(). 149 * 150 * 151 * fsh_callback_install(fsh_callback_t *callback) 152 * fsh_callback_remove(fsh_callback_handle_t handle) 153 * Installs/removes callbacks for vfs_t mount/free. The mount callback 154 * is executed right before domount() returns. The free callback is 155 * called right before VFS_FREEVFS() is called. 156 * The fsh_callback_install() returns a correct handle, or (-1) if 157 * hook/callback limit exceeded. 158 * 159 * 160 * b) fsh_impl.h (for vfs.c and vnode.c only) 161 * fsh_init() 162 * This call has to be done in vfsinit(). It initialises the fsh. It 163 * is absolutely necessary that this call is made before any other fsh 164 * operation. 165 * 166 * fsh_exec_mount_callbacks(vfs_t *vfsp) 167 * fsh_exec_free_callbacks(vfs_t *vfsp) 168 * Used to execute all fsh callbacks for {mount,free} of a vfs_t. 169 * 170 * fsh_fsrec_destroy(struct fsh_fsrecord *fsrecp) 171 * Destroys an fsh_fsrecord structure. All the hooks installed on this 172 * vfs_t are then destroyed. free callback is called before this function. 173 * 174 * fsh_foo(ARGUMENTS) 175 * Function used to execute the hook chain for a given syscall. 176 * 177 * 178 * 5. Internals. 179 * fsh_int_t is an internal hook structure. It is reference counted. 180 * fshi_hold() and fshi_rele() should be used whenever needed. 181 * fsh_int_t entries are elements of both fsh_map (global) and fshfsr_list 182 * (local to vfs_t). All entries are unique and are identified by fshi_handle. 183 * 184 * fsh_int_t properties: 185 * - fsh_hook_install() sets the ref. counter to 1 and adds it to both 186 * fsh_map and fshfsr_list 187 * - fsh_hook_remove() decreases the ref. counter by 1, removes the hook 188 * from fsh_map and marks the hook as *doomed* 189 * - if fsh_int_t is on the fshfsr_list, it's alive and there is a thread 190 * executing it 191 * - if fsh_int_t is marked as *doomed*, the reference counter is not 192 * be increased and thus no thread can acquire this fsh_int_t 193 * - ref. counter can drop to 0 only after an fsh_hook_remove() call; this 194 * also means that the fsh_int_t is *doomed* and isn't a part of fsh_map 195 * - fsh_int_t could be also destroyed without fsh_hook_remove() call, 196 * that happens only inside fsh_fsrec_destroy() where it is guaranteed 197 * that there is no thread executing the hook 198 * 199 * 200 * fsh_fsrecord_t is a structure which lives inside a vfs_t. 201 * fsh_fsrecord_t contains: 202 * - an rw-lock that protects the structure 203 * - a list of hooks installed on this vfs_t 204 * - a flag which tells whether fsh is enabled on this vfs_t 205 * 206 * 207 * fsh_fsrec_prepare rule: 208 * Every function that needs vfsp->vfs_fshrecord has to call 209 * fsh_fsrec_prepare() first. If and only if the call is made, it is safe to 210 * use vfsp->vfs_fshrecord. 211 * 212 * Unfortunately, because of unexpected behaviour of some filesystems (no use 213 * of vfs_alloc()/vfs_init()) there's no good place to initialise the 214 * fsh_fshrecord_t structure. The approach being used here is to check if it's 215 * initialised in every call. Because of the fact that no lock could be used 216 * here (the same problem with initialisation), a spinlock is used. This is 217 * explained in more detail in a comment before fsh_fsrec_prepare(). After 218 * calling fsh_preapre_fsrec() it's completely safe to keep the vfs_fshrecord 219 * pointer locally, because it won't be changed until vfs_free() is called. 220 * 221 * Exceptions from this rule: 222 * - vfs_free() - it is expected that no other fsh calls would be made for the 223 * vfs_t that's being freed. That's why vfs_fshrecord could be only NULL or a 224 * valid pointer and could not be concurrently accessed. 225 * - fshi_rele() - fsh_hook_install() comes before first fshi_rele() call; 226 * the fsh_fsrecord_t has been initialised there 227 * 228 * 229 * When there are no fsh functions (that use a particular fsh_fsrecord_t) 230 * executing, the vfs_fshrecord pointer won't be equal to fsh_res_ptr. It 231 * would be NULL or a pointer to an initialised fsh_fsrecord_t. 232 * 233 * It is required and sufficient to check if fsh_fsrecord_t is not NULL before 234 * passing it to fsh_fsrec_destroy. We don't have to check if it is not equal 235 * to fsh_res_ptr, because all the fsh API calls involving this vfs_t should 236 * end before vfs_free() is called (outside the fsh, fsh_fsrecord is never 237 * equal to fsh_res_ptr). That is guaranteed by the explicit requirement that 238 * the caller of fsh API holds the vfs_t when needed. fsh_hook_remove() must not 239 * be called either, because the handles are invalidated after free callback has 240 * fired. 241 * 242 * 243 * Callbacks: 244 * Mount callbacks are executed by a call to fsh_exec_mount_callbacks() right 245 * before returning from domount()@vfs.c. 246 * 247 * Free callbacks are executed by a call to fsh_exec_free_callbacks() right 248 * before calling VFS_FREEVFS(), after vfs_t's reference count drops to 0. 249 * 250 * 251 * 6. Locking 252 * a) public 253 * fsh does no vfs_t nor vnode_t locking. It is expected that whenever it is 254 * needed, the client does that. 255 * 256 * No locks are held across hooks or hook remove callbacks execution. It is 257 * safe to use fsh API inside hooks and hook remove callbacks. 258 * 259 * fsh_cb_lock is held across {mount,free} callbacks. Calling 260 * fsh_callback_{install,remove} inside of a callback will cause a deadlock. 261 * 262 * b) internals 263 * Locking diagram: 264 * 265 * fsh_hook_remove() fsh_hook_install() fsh_fsrec_destroy() 266 * | | | 267 * | | | 268 * +------------------+ | +------------+ 269 * | | | | 270 * | V | | 271 * V +------------|---|-+ 272 * fshi_rele() | fsh_lock | | | 273 * (sometimes) +------------|---|-+ 274 * | | | 275 * | +---+-- fshfsr_lock, RW_WRITER -+ 276 * | | 277 * V | 278 * +---------------------------------------+ | 279 * | fsh_map | | 280 * | | | 281 * +----|-> vfsp->vfs_fshrecord->fshfsr_list <--|----------------+ 282 * | +------------------------------^--------+ 283 * | | 284 * | | 285 * fshfsr_lock, RW_READER fshfsr_lock, RW_WRITER 286 * | | 287 * | | 288 * fsh_read(), fshi_rele() 289 * fsh_write(), 290 * ... Might be called from: 291 * fsh_hook_remove() 292 * fsh_read(), fsh_write(), ... 293 * 294 * 295 * fsh_lock is a global lock for adminsitrative path (fsh_hook_install, 296 * fsh_hook_remove) and fsh_fsrec_destroy() (which is semi-administrative, since 297 * it destroys the unremoved hooks). It is used only when fsh_map needs to be 298 * locked. The usage of this lock guarantees that the data in fsh_map and 299 * fshfsr_lists is consistent. 300 * 301 * In order to make calling callbacks inside callbacks possible, fsh_cb_owner is 302 * set by fsh_exec_{mount,free} callbacks to the thread that owns the 303 * fsh_cb_lock. It's always checked if we are owners of the mutex before 304 * entering it. 305 * 306 */ 307 308 309 /* Internals */ 310 typedef struct fsh_int { 311 fsh_handle_t fshi_handle; 312 fsh_t fshi_hooks; 313 vfs_t *fshi_vfsp; 314 315 kmutex_t fshi_lock; 316 uint64_t fshi_ref; 317 uint64_t fshi_doomed; /* changed inside fsh_lock */ 318 319 /* next node in fshfsr_list */ 320 list_node_t fshi_node; 321 322 /* next node in fsh_map */ 323 list_node_t fshi_global; 324 } fsh_int_t; 325 326 typedef struct fsh_callback_int { 327 fsh_callback_t fshci_cb; 328 fsh_callback_handle_t fshci_handle; 329 list_node_t fshci_node; 330 } fsh_callback_int_t; 331 332 333 typedef struct fsh_exec { 334 fsh_int_t *fshe_fshi; 335 void *fshe_instance; 336 list_node_t fshe_node; 337 } fsh_exec_t; 338 339 340 static kmutex_t fsh_lock; 341 342 /* 343 * fsh_fsrecord_t is the main internal structure. It's content is protected 344 * by fshfsr_lock. The fshfsr_list is a list of fsh_int_t hook entries for 345 * the vfs_t that contains the fsh_fsrecord_t. 346 */ 347 struct fsh_fsrecord { 348 krwlock_t fshfsr_lock; 349 int fshfsr_enabled; 350 list_t fshfsr_list; 351 }; 352 353 /* 354 * Global list of fsh_int_t. Protected by fsh_lock. 355 */ 356 static list_t fsh_map; 357 358 /* 359 * Global list of fsh_callback_int_t. 360 */ 361 static kmutex_t fsh_cb_lock; 362 static kmutex_t fsh_cb_owner_lock; 363 static kthread_t *fsh_cb_owner; 364 static list_t fsh_cblist; 365 366 /* 367 * A reserved pointer for fsh purposes. It is used because of the method 368 * chosen for solving concurrency issues with vfs_fshrecord. The full 369 * explanation is in the big theory statement at the beginning of this 370 * file and above fsh_fsrec_prepare(). It is initialised in fsh_init(). 371 */ 372 static void *fsh_res_ptr; 373 374 static fsh_fsrecord_t *fsh_fsrec_create(); 375 376 int fsh_limit = INT_MAX; 377 static id_space_t *fsh_idspace; 378 379 /* 380 * fsh_fsrec_prepare() 381 * 382 * Important note: 383 * Before using this function, fsh_init() MUST be called. We do that in 384 * vfsinit()@vfs.c. 385 * 386 * One would ask, why isn't the vfsp->vfs_fshrecord initialised when the 387 * vfs_t is created. Unfortunately, some filesystems (e.g. fifofs) do not 388 * call vfs_init() or even vfs_alloc(), It's possible that some unbundled 389 * filesystems could do the same thing. That's why this solution is 390 * introduced. It should be called before any code that needs access to 391 * vfs_fshrecord. 392 * 393 * Locking: 394 * There are no locks here, because there's no good place to initialise 395 * the lock. Concurrency issues are solved by using atomic instructions 396 * and a spinlock, which is spinning only once for a given vfs_t. Because 397 * of that, the usage of the spinlock isn't bad at all. 398 * 399 * How it works: 400 * a) if vfsp->vfs_fshrecord equals NULL, atomic_cas_ptr() changes it to 401 * fsh_res_ptr. That's a signal for other threads, that the structure 402 * is being initialised. 403 * b) if vfsp->vfs_fshrecord equals fsh_res_ptr, that means we have to wait, 404 * because vfs_fshrecord is being initialised by another call. 405 * c) other cases: 406 * vfs_fshrecord is already initialised, so we can use it. It won't change 407 * until vfs_free() is called. It can't happen when someone is holding 408 * the vfs_t, which is expected from the caller of fsh API. 409 */ 410 static void 411 fsh_fsrec_prepare(vfs_t *vfsp) 412 { 413 fsh_fsrecord_t *fsrec; 414 415 while ((fsrec = atomic_cas_ptr(&vfsp->vfs_fshrecord, NULL, 416 fsh_res_ptr)) == fsh_res_ptr) 417 ; 418 419 if (fsrec == NULL) 420 atomic_swap_ptr(&vfsp->vfs_fshrecord, fsh_fsrec_create()); 421 } 422 423 /* 424 * API for enabling/disabling fsh per vfs_t. 425 * 426 * A newly created vfs_t has fsh enabled by default. If one would want to change 427 * this behaviour, mount callbacks could be used. 428 * 429 * The caller is expected to hold the vfs_t. 430 * 431 * These functions must NOT be called in a hook. 432 */ 433 void 434 fsh_fs_enable(vfs_t *vfsp) 435 { 436 fsh_fsrec_prepare(vfsp); 437 438 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 439 vfsp->vfs_fshrecord->fshfsr_enabled = 1; 440 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 441 } 442 443 void 444 fsh_fs_disable(vfs_t *vfsp) 445 { 446 fsh_fsrec_prepare(vfsp); 447 448 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 449 vfsp->vfs_fshrecord->fshfsr_enabled = 0; 450 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 451 } 452 453 /* 454 * API used for installing hooks. fsh_handle_t is returned for further 455 * actions (currently just removing) on this set of hooks. 456 * 457 * It's important that the hooks are executed in LIFO installation order (they 458 * are added to the head of the hook list). 459 * 460 * The caller is expected to hold the vfs_t. 461 * 462 * Returns (-1) if hook/callback limit exceeded, handle otherwise. 463 */ 464 fsh_handle_t 465 fsh_hook_install(vfs_t *vfsp, fsh_t *hooks) 466 { 467 fsh_handle_t handle; 468 fsh_int_t *fshi; 469 470 fsh_fsrec_prepare(vfsp); 471 472 if ((handle = id_alloc(fsh_idspace)) == -1) 473 return (-1); 474 475 fshi = kmem_alloc(sizeof (*fshi), KM_SLEEP); 476 mutex_init(&fshi->fshi_lock, NULL, MUTEX_DRIVER, NULL); 477 (void) memcpy(&fshi->fshi_hooks, hooks, sizeof (fshi->fshi_hooks)); 478 fshi->fshi_handle = handle; 479 fshi->fshi_doomed = 0; 480 fshi->fshi_ref = 1; 481 fshi->fshi_vfsp = vfsp; 482 483 mutex_enter(&fsh_lock); 484 rw_enter(&vfsp->vfs_fshrecord->fshfsr_lock, RW_WRITER); 485 list_insert_head(&vfsp->vfs_fshrecord->fshfsr_list, fshi); 486 rw_exit(&vfsp->vfs_fshrecord->fshfsr_lock); 487 488 list_insert_head(&fsh_map, fshi); 489 mutex_exit(&fsh_lock); 490 491 return (handle); 492 } 493 494 static int 495 fshi_hold(fsh_int_t *fshi) 496 { 497 int can_hold; 498 499 mutex_enter(&fshi->fshi_lock); 500 if (fshi->fshi_doomed == 1) { 501 can_hold = 0; 502 } else { 503 fshi->fshi_ref++; 504 can_hold = 1; 505 } 506 mutex_exit(&fshi->fshi_lock); 507 508 return (can_hold); 509 } 510 511 /* 512 * This function must not be called while fshfsr_lock is held. Doing so could 513 * cause a deadlock. 514 */ 515 static void 516 fshi_rele(fsh_int_t *fshi) 517 { 518 int destroy; 519 520 mutex_enter(&fshi->fshi_lock); 521 ASSERT(fshi->fshi_ref > 0); 522 fshi->fshi_ref--; 523 if (fshi->fshi_ref == 0) { 524 ASSERT(fshi->fshi_doomed == 1); 525 destroy = 1; 526 } else { 527 destroy = 0; 528 } 529 mutex_exit(&fshi->fshi_lock); 530 531 if (destroy) { 532 /* 533 * At this point, we are sure that fsh_hook_remove() has been 534 * called, that's why we don't remove the fshi from fsh_map. 535 * fsh_hook_remove() did that already. 536 * There is also no need to call fsh_fsrec_prepare() here. 537 */ 538 fsh_fsrecord_t *fsrecp; 539 540 /* 541 * We don't have to call fsh_fsrec_prepare() here. 542 * fsh_fsrecord_t is already initialised, because we've found a 543 * mapping for the given handle. 544 */ 545 fsrecp = fshi->fshi_vfsp->vfs_fshrecord; 546 ASSERT(fsrecp != NULL); 547 ASSERT(fsrecp != fsh_res_ptr); 548 549 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER); 550 list_remove(&fsrecp->fshfsr_list, fshi); 551 rw_exit(&fsrecp->fshfsr_lock); 552 553 if (fshi->fshi_hooks.remove_cb != NULL) 554 (*fshi->fshi_hooks.remove_cb)( 555 fshi->fshi_hooks.arg, fshi->fshi_handle); 556 557 id_free(fsh_idspace, fshi->fshi_handle); 558 mutex_destroy(&fshi->fshi_lock); 559 kmem_free(fshi, sizeof (*fshi)); 560 } 561 } 562 563 /* 564 * Used for removing a hook set. 565 * 566 * fsh_hook_remove() invalidates the given handle. 567 * 568 * It is guaranteed, that after successful return from fsh_hook_remove(), 569 * calls to vnodeops/vfsops, on the vfs_t on which the hook is installed, won't 570 * go through this hook. 571 * 572 * There is no guarantee that after fsh_hook_remove() returns, the hook 573 * associated with the handle won't be executing. Instead, it is guaranteed that 574 * when remove_cb() is called, the hook finished it's execution in all threads. 575 * It is safe to destroy all internal data associated with this hook inside 576 * remove_cb(). 577 * 578 * It is possible that remove_cb() would be called before fsh_hook_remove() 579 * returns. 580 * 581 * Returns (-1) if hook wasn't found, 0 otherwise. 582 */ 583 int 584 fsh_hook_remove(fsh_handle_t handle) 585 { 586 fsh_int_t *fshi; 587 588 mutex_enter(&fsh_lock); 589 for (fshi = list_head(&fsh_map); fshi != NULL; 590 fshi = list_next(&fsh_map, fshi)) { 591 if (fshi->fshi_handle == handle) { 592 list_remove(&fsh_map, fshi); 593 break; 594 } 595 } 596 597 if (fshi == NULL) 598 return (-1); 599 600 mutex_enter(&fshi->fshi_lock); 601 ASSERT(fshi->fshi_doomed == 0); 602 fshi->fshi_doomed = 1; 603 mutex_exit(&fshi->fshi_lock); 604 mutex_exit(&fsh_lock); 605 606 fshi_rele(fshi); 607 608 return (0); 609 } 610 611 /* 612 * API for installing global mount/free callbacks. 613 * 614 * fsh_callback_t fields: 615 * fshc_arg - argument passed to the callbacks 616 * fshc_free - callback fired before VFS_FREEVFS() is called, after vfs_count 617 * drops to 0 618 * fshc_mount - callback fired right before returning from domount() 619 * The first argument of these callbacks is the vfs_t that is mounted/freed. 620 * The second one is the fshc_arg. 621 * 622 * fsh_callback_handle_t is filled out by this function. 623 * 624 * Returns (-1) if hook/callback limit exceeded. 625 * 626 * Calling this function in a {mount,free} callback will cause a deadlock. 627 */ 628 fsh_callback_handle_t 629 fsh_callback_install(fsh_callback_t *callback) 630 { 631 fsh_callback_int_t *fshci; 632 fsh_callback_handle_t handle; 633 634 if ((handle = id_alloc(fsh_idspace)) == -1) 635 return (-1); 636 637 fshci = (fsh_callback_int_t *)kmem_alloc(sizeof (*fshci), KM_SLEEP); 638 (void) memcpy(&fshci->fshci_cb, callback, sizeof (fshci->fshci_cb)); 639 fshci->fshci_handle = handle; 640 641 mutex_enter(&fsh_cb_lock); 642 list_insert_head(&fsh_cblist, fshci); 643 mutex_exit(&fsh_cb_lock); 644 645 return (handle); 646 } 647 648 /* 649 * API for removing global mount/free callbacks. 650 * 651 * Returns (-1) if callback wasn't found, 0 otherwise. 652 * 653 * Calling this function in a {mount,free} callback will cause a deadlock. 654 */ 655 int 656 fsh_callback_remove(fsh_callback_handle_t handle) 657 { 658 fsh_callback_int_t *fshci; 659 660 mutex_enter(&fsh_cb_lock); 661 662 for (fshci = list_head(&fsh_cblist); fshci != NULL; 663 fshci = list_next(&fsh_cblist, fshci)) { 664 if (fshci->fshci_handle == handle) { 665 list_remove(&fsh_cblist, fshci); 666 break; 667 } 668 } 669 670 mutex_exit(&fsh_cb_lock); 671 672 if (fshci == NULL) 673 return (-1); 674 675 kmem_free(fshci, sizeof (*fshci)); 676 id_free(fsh_idspace, handle); 677 678 return (0); 679 } 680 681 /* 682 * This function is executed right before returning from domount()@vfs.c. 683 * We are sure that it's called only after fsh_init(). 684 * It executes all the mount callbacks installed in the fsh. 685 * 686 * Since fsh_exec_mount_callbacks() is called only inside domount(), it is legal 687 * to call fsh_hook_{install,remove}() inside a mount callback WITHOUT holding 688 * this vfs_t. This guarantee should be preserved, because it's in the "Usage" 689 * section in the big theory statement at the top of this file. 690 */ 691 void 692 fsh_exec_mount_callbacks(vfs_t *vfsp) 693 { 694 fsh_callback_int_t *fshci; 695 fsh_callback_t *cb; 696 int fsh_context; 697 698 mutex_enter(&fsh_cb_owner_lock); 699 fsh_context = fsh_cb_owner == curthread; 700 mutex_exit(&fsh_cb_owner_lock); 701 702 if (!fsh_context) { 703 mutex_enter(&fsh_cb_lock); 704 mutex_enter(&fsh_cb_owner_lock); 705 fsh_cb_owner = curthread; 706 mutex_exit(&fsh_cb_owner_lock); 707 } 708 709 ASSERT(MUTEX_HELD(&fsh_cb_lock)); 710 711 for (fshci = list_head(&fsh_cblist); fshci != NULL; 712 fshci = list_next(&fsh_cblist, fshci)) { 713 cb = &fshci->fshci_cb; 714 if (cb->fshc_mount != NULL) 715 (*(cb->fshc_mount))(vfsp, cb->fshc_arg); 716 } 717 718 if (!fsh_context) { 719 mutex_enter(&fsh_cb_owner_lock); 720 fsh_cb_owner = NULL; 721 mutex_exit(&fsh_cb_owner_lock); 722 mutex_exit(&fsh_cb_lock); 723 } 724 } 725 726 /* 727 * This function is executed right before VFS_FREEVFS() is called in 728 * vfs_rele()@vfs.c. We are sure that it's called only after fsh_init(). 729 * It executes all the free callbacks installed in the fsh. 730 * 731 * free() callback is the point after the handles associated with the hooks 732 * installed on this vfs_t become invalid 733 */ 734 void 735 fsh_exec_free_callbacks(vfs_t *vfsp) 736 { 737 fsh_callback_int_t *fshci; 738 fsh_callback_t *cb; 739 int fsh_context; 740 741 mutex_enter(&fsh_cb_owner_lock); 742 fsh_context = fsh_cb_owner == curthread; 743 mutex_exit(&fsh_cb_owner_lock); 744 745 if (!fsh_context) { 746 mutex_enter(&fsh_cb_lock); 747 mutex_enter(&fsh_cb_owner_lock); 748 fsh_cb_owner = curthread; 749 mutex_exit(&fsh_cb_owner_lock); 750 } 751 752 ASSERT(MUTEX_HELD(&fsh_cb_lock)); 753 754 for (fshci = list_head(&fsh_cblist); fshci != NULL; 755 fshci = list_next(&fsh_cblist, fshci)) { 756 cb = &fshci->fshci_cb; 757 if (cb->fshc_free != NULL) 758 (*(cb->fshc_free))(vfsp, cb->fshc_arg); 759 } 760 761 if (!fsh_context) { 762 mutex_enter(&fsh_cb_owner_lock); 763 fsh_cb_owner = NULL; 764 mutex_exit(&fsh_cb_owner_lock); 765 mutex_exit(&fsh_cb_lock); 766 } 767 } 768 769 /* 770 * API for vnode.c/vfs.c to start executing the fsh for a given operation. 771 * 772 * fsh_xxx() tries to find the first non-NULL xxx hook on the fshfsr_list. If it 773 * does, it executes it. If not, underlying vnodeop/vfsop is called. 774 * 775 * These interfaces are using fsh_res_ptr (in fsh_fsrec_prepare()), so it's 776 * absolutely necessary to call fsh_init() before using them. That's done in 777 * vfsinit(). 778 * 779 * While these functions are executing, it's expected that necessary vfs_t's 780 * are held so that vfs_free() isn't called. vfs_free() expects that noone 781 * accesses vfs_fshrecord of a given vfs_t. 782 * It's also the caller's responsibility to keep vnode_t passed to fsh_foo() 783 * alive and valid. 784 * All these expectations are met because these functions are used only in 785 * correspondng {fop,fsop}_foo() functions. 786 */ 787 int 788 fsh_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 789 caller_context_t *ct) 790 { 791 int ret; 792 fsh_fsrecord_t *fsrecp; 793 fsh_int_t *fshi; 794 fsh_exec_t *fshe; 795 list_t exec_list; 796 797 fsh_fsrec_prepare(vp->v_vfsp); 798 fsrecp = vp->v_vfsp->vfs_fshrecord; 799 800 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 801 if (!(fsrecp->fshfsr_enabled)) { 802 rw_exit(&fsrecp->fshfsr_lock); 803 return ((*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct)); 804 } 805 806 list_create(&exec_list, sizeof (fsh_exec_t), 807 offsetof(fsh_exec_t, fshe_node)); 808 809 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 810 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 811 if (fshi->fshi_hooks.pre_read != NULL || 812 fshi->fshi_hooks.post_read != NULL) { 813 if (fshi_hold(fshi)) { 814 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP); 815 fshe->fshe_fshi = fshi; 816 list_insert_tail(&exec_list, fshe); 817 } 818 } 819 } 820 rw_exit(&fsrecp->fshfsr_lock); 821 822 /* Execute pre hooks */ 823 for (fshe = list_head(&exec_list); fshe != NULL; 824 fshe = list_next(&exec_list, fshe)) { 825 if (fshe->fshe_fshi->fshi_hooks.pre_read != NULL) 826 (*fshe->fshe_fshi->fshi_hooks.pre_read)( 827 fshe->fshe_fshi->fshi_hooks.arg, 828 &fshe->fshe_instance, 829 &vp, &uiop, &ioflag, &cr, &ct); 830 } 831 832 ret = (*vp->v_op->vop_read)(vp, uiop, ioflag, cr, ct); 833 834 /* Execute post hooks */ 835 while ((fshe = list_remove_tail(&exec_list)) != NULL) { 836 if (fshe->fshe_fshi->fshi_hooks.post_read != NULL) 837 ret = (*fshe->fshe_fshi->fshi_hooks.post_read)( 838 ret, fshe->fshe_fshi->fshi_hooks.arg, 839 fshe->fshe_instance, 840 vp, uiop, ioflag, cr, ct); 841 fshi_rele(fshe->fshe_fshi); 842 kmem_free(fshe, sizeof (*fshe)); 843 } 844 list_destroy(&exec_list); 845 846 return (ret); 847 } 848 849 int 850 fsh_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 851 caller_context_t *ct) 852 { 853 int ret; 854 fsh_fsrecord_t *fsrecp; 855 fsh_int_t *fshi; 856 fsh_exec_t *fshe; 857 list_t exec_list; 858 859 fsh_fsrec_prepare(vp->v_vfsp); 860 fsrecp = vp->v_vfsp->vfs_fshrecord; 861 862 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 863 if (!(fsrecp->fshfsr_enabled)) { 864 rw_exit(&fsrecp->fshfsr_lock); 865 return ((*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct)); 866 } 867 868 list_create(&exec_list, sizeof (fsh_exec_t), 869 offsetof(fsh_exec_t, fshe_node)); 870 871 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 872 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 873 if (fshi->fshi_hooks.pre_write != NULL || 874 fshi->fshi_hooks.post_write != NULL) { 875 if (fshi_hold(fshi)) { 876 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP); 877 fshe->fshe_fshi = fshi; 878 list_insert_tail(&exec_list, fshe); 879 } 880 } 881 } 882 rw_exit(&fsrecp->fshfsr_lock); 883 884 /* Execute pre hooks */ 885 for (fshe = list_head(&exec_list); fshe != NULL; 886 fshe = list_next(&exec_list, fshe)) { 887 if (fshe->fshe_fshi->fshi_hooks.pre_write != NULL) 888 (*fshe->fshe_fshi->fshi_hooks.pre_write)( 889 fshe->fshe_fshi->fshi_hooks.arg, 890 &fshe->fshe_instance, 891 &vp, &uiop, &ioflag, &cr, &ct); 892 } 893 894 ret = (*vp->v_op->vop_write)(vp, uiop, ioflag, cr, ct); 895 896 /* Execute post hooks */ 897 while ((fshe = list_remove_tail(&exec_list)) != NULL) { 898 if (fshe->fshe_fshi->fshi_hooks.post_write != NULL) 899 ret = (*fshe->fshe_fshi->fshi_hooks.post_write)( 900 ret, fshe->fshe_fshi->fshi_hooks.arg, 901 fshe->fshe_instance, 902 vp, uiop, ioflag, cr, ct); 903 fshi_rele(fshe->fshe_fshi); 904 kmem_free(fshe, sizeof (*fshe)); 905 } 906 list_destroy(&exec_list); 907 908 return (ret); 909 } 910 911 int 912 fsh_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 913 { 914 int ret; 915 fsh_fsrecord_t *fsrecp; 916 fsh_int_t *fshi; 917 fsh_exec_t *fshe; 918 list_t exec_list; 919 920 fsh_fsrec_prepare(vfsp); 921 fsrecp = vfsp->vfs_fshrecord; 922 923 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 924 if (!(fsrecp->fshfsr_enabled)) { 925 rw_exit(&fsrecp->fshfsr_lock); 926 return ((*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr)); 927 } 928 929 list_create(&exec_list, sizeof (fsh_exec_t), 930 offsetof(fsh_exec_t, fshe_node)); 931 932 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 933 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 934 if (fshi->fshi_hooks.pre_mount != NULL || 935 fshi->fshi_hooks.post_mount != NULL) { 936 if (fshi_hold(fshi)) { 937 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP); 938 fshe->fshe_fshi = fshi; 939 list_insert_tail(&exec_list, fshe); 940 } 941 } 942 } 943 rw_exit(&fsrecp->fshfsr_lock); 944 945 /* Execute pre hooks */ 946 for (fshe = list_head(&exec_list); fshe != NULL; 947 fshe = list_next(&exec_list, fshe)) { 948 if (fshe->fshe_fshi->fshi_hooks.pre_mount != NULL) 949 (*fshe->fshe_fshi->fshi_hooks.pre_mount)( 950 &fshe->fshe_fshi->fshi_hooks.arg, 951 &fshe->fshe_instance, 952 &vfsp, &mvp, &uap, &cr); 953 } 954 955 ret = (*vfsp->vfs_op->vfs_mount)(vfsp, mvp, uap, cr); 956 957 /* Execute post hooks */ 958 while ((fshe = list_remove_tail(&exec_list)) != NULL) { 959 if (fshe->fshe_fshi->fshi_hooks.post_mount != NULL) 960 ret = (*fshe->fshe_fshi->fshi_hooks.post_mount)( 961 ret, fshe->fshe_fshi->fshi_hooks.arg, 962 fshe->fshe_instance, 963 vfsp, mvp, uap, cr); 964 fshi_rele(fshe->fshe_fshi); 965 kmem_free(fshe, sizeof (*fshe)); 966 } 967 list_destroy(&exec_list); 968 969 return (ret); 970 } 971 972 int 973 fsh_unmount(vfs_t *vfsp, int flag, cred_t *cr) 974 { 975 int ret; 976 fsh_fsrecord_t *fsrecp; 977 fsh_int_t *fshi; 978 fsh_exec_t *fshe; 979 list_t exec_list; 980 981 fsh_fsrec_prepare(vfsp); 982 fsrecp = vfsp->vfs_fshrecord; 983 984 rw_enter(&fsrecp->fshfsr_lock, RW_READER); 985 if (!(fsrecp->fshfsr_enabled)) { 986 rw_exit(&fsrecp->fshfsr_lock); 987 return ((*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr)); 988 } 989 990 list_create(&exec_list, sizeof (fsh_exec_t), 991 offsetof(fsh_exec_t, fshe_node)); 992 993 for (fshi = list_head(&fsrecp->fshfsr_list); fshi != NULL; 994 fshi = list_next(&fsrecp->fshfsr_list, fshi)) { 995 if (fshi->fshi_hooks.pre_unmount != NULL || 996 fshi->fshi_hooks.post_unmount != NULL) { 997 if (fshi_hold(fshi)) { 998 fshe = kmem_alloc(sizeof (*fshe), KM_SLEEP); 999 fshe->fshe_fshi = fshi; 1000 list_insert_tail(&exec_list, fshe); 1001 } 1002 } 1003 } 1004 rw_exit(&fsrecp->fshfsr_lock); 1005 1006 /* Execute pre hooks */ 1007 for (fshe = list_head(&exec_list); fshe != NULL; 1008 fshe = list_next(&exec_list, fshe)) { 1009 if (fshe->fshe_fshi->fshi_hooks.pre_unmount != NULL) 1010 (*fshe->fshe_fshi->fshi_hooks.pre_unmount)( 1011 fshe->fshe_fshi->fshi_hooks.arg, 1012 &fshe->fshe_instance, 1013 &vfsp, &flag, &cr); 1014 } 1015 1016 ret = (*vfsp->vfs_op->vfs_unmount)(vfsp, flag, cr); 1017 1018 /* Execute post hooks */ 1019 while ((fshe = list_remove_tail(&exec_list)) != NULL) { 1020 if (fshe->fshe_fshi->fshi_hooks.post_unmount != NULL) 1021 ret = (*fshe->fshe_fshi->fshi_hooks.post_unmount)( 1022 ret, fshe->fshe_fshi->fshi_hooks.arg, 1023 fshe->fshe_instance, 1024 vfsp, flag, cr); 1025 fshi_rele(fshe->fshe_fshi); 1026 kmem_free(fshe, sizeof (*fshe)); 1027 } 1028 list_destroy(&exec_list); 1029 1030 return (ret); 1031 } 1032 1033 /* 1034 * This is the funtion used by fsh_fsrec_prepare() to allocate a new 1035 * fsh_fsrecord. This function is called by the first function which 1036 * access the vfs_fshrecord and finds out it's NULL. 1037 */ 1038 static fsh_fsrecord_t * 1039 fsh_fsrec_create() 1040 { 1041 fsh_fsrecord_t *fsrecp; 1042 1043 fsrecp = (fsh_fsrecord_t *)kmem_zalloc(sizeof (*fsrecp), KM_SLEEP); 1044 list_create(&fsrecp->fshfsr_list, sizeof (fsh_int_t), 1045 offsetof(fsh_int_t, fshi_node)); 1046 rw_init(&fsrecp->fshfsr_lock, NULL, RW_DRIVER, NULL); 1047 fsrecp->fshfsr_enabled = 1; 1048 return (fsrecp); 1049 } 1050 1051 1052 /* 1053 * This call must be used ONLY in vfs_free(). 1054 * 1055 * It is required and sufficient to check if fsh_fsrecord_t is not NULL before 1056 * passing it to fsh_fsrec_destroy. 1057 * 1058 * All the remaining hooks are being removed here. 1059 */ 1060 void 1061 fsh_fsrec_destroy(struct fsh_fsrecord *volatile fsrecp) 1062 { 1063 fsh_int_t *fshi; 1064 1065 VERIFY(fsrecp != NULL); 1066 1067 _NOTE(CONSTCOND) 1068 while (1) { 1069 mutex_enter(&fsh_lock); 1070 rw_enter(&fsrecp->fshfsr_lock, RW_WRITER); 1071 fshi = list_remove_head(&fsrecp->fshfsr_list); 1072 rw_exit(&fsrecp->fshfsr_lock); 1073 if (fshi == NULL) { 1074 mutex_exit(&fsh_lock); 1075 break; 1076 } 1077 ASSERT(fshi->fshi_doomed == 0); 1078 list_remove(&fsh_map, fshi); 1079 mutex_exit(&fsh_lock); 1080 1081 if (fshi->fshi_hooks.remove_cb != NULL) 1082 (*fshi->fshi_hooks.remove_cb)(fshi->fshi_hooks.arg, 1083 fshi->fshi_handle); 1084 1085 id_free(fsh_idspace, fshi->fshi_handle); 1086 mutex_destroy(&fshi->fshi_lock); 1087 kmem_free(fshi, sizeof (*fshi)); 1088 1089 } 1090 1091 list_destroy(&fsrecp->fshfsr_list); 1092 rw_destroy(&fsrecp->fshfsr_lock); 1093 kmem_free(fsrecp, sizeof (*fsrecp)); 1094 } 1095 1096 /* 1097 * fsh_init() is called in vfsinit()@vfs.c. This function MUST be called 1098 * before every other fsh call. 1099 */ 1100 void 1101 fsh_init(void) 1102 { 1103 mutex_init(&fsh_cb_lock, NULL, MUTEX_DRIVER, NULL); 1104 mutex_init(&fsh_cb_owner_lock, NULL, MUTEX_DRIVER, NULL); 1105 list_create(&fsh_cblist, sizeof (fsh_callback_int_t), 1106 offsetof(fsh_callback_int_t, fshci_node)); 1107 1108 mutex_init(&fsh_lock, NULL, MUTEX_DRIVER, NULL); 1109 1110 list_create(&fsh_map, sizeof (fsh_int_t), offsetof(fsh_int_t, 1111 fshi_global)); 1112 1113 /* See comment above fsh_fsrec_prepare() */ 1114 fsh_res_ptr = (void *)-1; 1115 1116 fsh_idspace = id_space_create("fsh", 0, fsh_limit); 1117 }