1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2011 Bayard G. Bell. All rights reserved. 25 */ 26 27 #include <sys/systm.h> 28 #include <sys/cmn_err.h> 29 #include <sys/errno.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/signal.h> 33 #include <sys/modctl.h> 34 #include <sys/proc.h> 35 #include <sys/lvm/mdvar.h> 36 37 md_ops_t event_md_ops; 38 #ifndef lint 39 md_ops_t *md_interface_ops = &event_md_ops; 40 #endif 41 42 extern void sigintr(); 43 extern void sigunintr(); 44 extern md_set_t md_set[]; 45 46 extern kmutex_t md_mx; /* used to md global stuff */ 47 extern kcondvar_t md_cv; /* md_status events */ 48 extern int md_status; 49 extern clock_t md_hz; 50 extern md_event_queue_t *md_event_queue; 51 static void md_reaper(); 52 extern void md_clear_named_service(); 53 54 /* event handler stuff */ 55 kmutex_t md_eventq_mx; 56 int md_reap_count = 32; /* check for pid alive */ 57 int md_reap = 0; 58 int md_max_notify_queue = 512; 59 int md_reap_off = 0; /* non-zero turns off reap */ 60 /* don't allow module to be unloaded until all pending ops are complete */ 61 int global_lock_wait_cnt = 0; 62 63 static int 64 md_flush_queue(md_event_queue_t *queue) 65 { 66 md_event_t *element, *next_element; 67 /* 68 * if there is something waiting on it and the 69 * process/pid no longer exist then signal the defunct 70 * process continue on to clean this up later. 71 */ 72 if (queue->mdn_waiting) 73 return (1); 74 /* 75 * this pid no longer exists blow it away 76 * first remove any entries, then unlink it and lastly 77 * free it. 78 */ 79 element = queue->mdn_front; 80 while (element) { 81 next_element = element->mdn_next; 82 kmem_free(element, sizeof (md_event_t)); 83 element = next_element; 84 } 85 queue->mdn_front = queue->mdn_tail = NULL; 86 return (0); 87 88 } 89 90 static void 91 md_put_event(md_tags_t tag, set_t sp, md_dev64_t dev, int event, 92 u_longlong_t user) 93 { 94 95 md_event_queue_t *queue; 96 md_event_t *entry; 97 98 if (!md_event_queue) 99 return; 100 101 mutex_enter(&md_eventq_mx); 102 for (queue = md_event_queue; queue; queue = queue->mdn_nextq) { 103 if (queue->mdn_size >= md_max_notify_queue) { 104 ASSERT(queue->mdn_front != NULL); 105 ASSERT(queue->mdn_front->mdn_next != NULL); 106 entry = queue->mdn_front; 107 queue->mdn_front = entry->mdn_next; 108 queue->mdn_size--; 109 queue->mdn_flags |= MD_EVENT_QUEUE_FULL; 110 } else 111 entry = (md_event_t *)kmem_alloc(sizeof (md_event_t), 112 KM_NOSLEEP); 113 if (entry == NULL) { 114 queue->mdn_flags |= MD_EVENT_QUEUE_INVALID; 115 continue; 116 } 117 entry->mdn_tag = tag; 118 entry->mdn_set = sp; 119 entry->mdn_dev = dev; 120 entry->mdn_event = event; 121 entry->mdn_user = user; 122 entry->mdn_next = NULL; 123 uniqtime(&entry->mdn_time); 124 if (queue->mdn_front == NULL) { 125 queue->mdn_front = entry; 126 queue->mdn_tail = entry; 127 } else { 128 queue->mdn_tail->mdn_next = entry; 129 queue->mdn_tail = entry; 130 } 131 if (queue->mdn_waiting) 132 cv_signal(&queue->mdn_cv); 133 134 queue->mdn_size++; 135 } 136 md_reap++; 137 mutex_exit(&md_eventq_mx); 138 139 if (md_reap > md_reap_count) 140 md_reaper(); 141 } 142 143 static void 144 md_reaper() 145 { 146 md_event_queue_t *next = md_event_queue; 147 md_event_queue_t *present, *last = NULL; 148 149 if (md_event_queue == NULL || md_reap_off) 150 return; 151 152 mutex_enter(&md_eventq_mx); 153 while (next) { 154 present = next; 155 next = present->mdn_nextq; 156 157 /* check for long term event queue */ 158 if (present->mdn_flags & MD_EVENT_QUEUE_PERM) { 159 last = present; 160 continue; 161 } 162 163 /* check to see if the pid is still alive */ 164 if (!md_checkpid(present->mdn_pid, present->mdn_proc)) 165 present->mdn_flags |= MD_EVENT_QUEUE_DESTROY; 166 167 /* see if queue is a "marked queue" if so destroy */ 168 if (! (present->mdn_flags & MD_EVENT_QUEUE_DESTROY)) { 169 last = present; 170 continue; 171 } 172 173 /* yeeeha blow this one away */ 174 present->mdn_pid = 0; 175 present->mdn_proc = NULL; 176 /* 177 * if there is something waiting on it and the 178 * process/pid no longer exist then signal the defunct 179 * process continue on to clean this up later. 180 */ 181 if (md_flush_queue(present)) { 182 present->mdn_flags = MD_EVENT_QUEUE_DESTROY; 183 cv_broadcast(&present->mdn_cv); 184 last = present; 185 continue; 186 } 187 /* remove the entry */ 188 if (last == NULL) 189 md_event_queue = next; 190 else 191 last->mdn_nextq = next; 192 cv_destroy(&present->mdn_cv); 193 kmem_free(present, sizeof (md_event_queue_t)); 194 } 195 md_reap = 0; 196 mutex_exit(&md_eventq_mx); 197 } 198 199 /* ARGSUSED */ 200 static int 201 notify_halt(md_haltcmd_t cmd, set_t setno) 202 { 203 md_event_queue_t *orig_queue, *queue, *queue_free; 204 int i; 205 206 207 switch (cmd) { 208 case MD_HALT_CLOSE: 209 case MD_HALT_OPEN: 210 case MD_HALT_DOIT: 211 case MD_HALT_CHECK: 212 213 return (0); 214 215 case MD_HALT_UNLOAD: 216 if (setno != MD_LOCAL_SET) 217 return (1); 218 mutex_enter(&md_eventq_mx); 219 if (md_event_queue == NULL) { 220 mutex_exit(&md_eventq_mx); 221 return (0); 222 } 223 224 orig_queue = md_event_queue; 225 md_event_queue = NULL; 226 for (i = 0; i < MD_NOTIFY_HALT_TRIES; i++) { 227 for (queue = orig_queue; queue; 228 queue = queue->mdn_nextq) { 229 if (queue->mdn_waiting == 0) { 230 continue; 231 } 232 queue->mdn_flags = MD_EVENT_QUEUE_DESTROY; 233 mutex_exit(&md_eventq_mx); 234 cv_broadcast(&queue->mdn_cv); 235 delay(md_hz); 236 mutex_enter(&md_eventq_mx); 237 } 238 } 239 for (queue = orig_queue; queue; ) { 240 if (md_flush_queue(queue)) { 241 cmn_err(CE_WARN, "md: queue not freed"); 242 mutex_exit(&md_eventq_mx); 243 return (1); 244 } 245 queue_free = queue; 246 queue = queue->mdn_nextq; 247 kmem_free(queue_free, sizeof (md_event_queue_t)); 248 } 249 md_event_queue = NULL; 250 mutex_exit(&md_eventq_mx); 251 return (0); 252 253 default: 254 return (1); 255 } 256 } 257 258 static md_event_queue_t * 259 md_find_event_queue(char *q_name, int lock) 260 { 261 md_event_queue_t *event_q = md_event_queue; 262 263 if (lock) 264 mutex_enter(&md_eventq_mx); 265 ASSERT(MUTEX_HELD(&md_eventq_mx)); 266 while (event_q) { 267 if ((*event_q->mdn_name != *q_name) || 268 (event_q->mdn_flags & MD_EVENT_QUEUE_DESTROY)) { 269 event_q = event_q->mdn_nextq; 270 continue; 271 } 272 273 if (bcmp(q_name, event_q->mdn_name, MD_NOTIFY_NAME_SIZE) == 0) 274 break; 275 event_q = event_q->mdn_nextq; 276 } 277 if (lock) 278 mutex_exit(&md_eventq_mx); 279 280 return ((md_event_queue_t *)event_q); 281 } 282 283 static intptr_t 284 notify_interface(md_event_cmds_t cmd, md_tags_t tag, set_t set, md_dev64_t dev, 285 md_event_type_t event) 286 { 287 switch (cmd) { 288 case EQ_PUT: 289 md_put_event(tag, set, dev, event, (u_longlong_t)0); 290 break; 291 default: 292 return (-1); 293 } 294 return (0); 295 } 296 297 static int 298 notify_fillin_empty_ioctl(void *data, void *ioctl_in, size_t sz, 299 int mode) 300 { 301 302 int err; 303 md_event_ioctl_t *ioctl = (md_event_ioctl_t *)data; 304 305 306 ioctl->mdn_event = EQ_EMPTY; 307 ioctl->mdn_tag = TAG_EMPTY; 308 ioctl->mdn_set = MD_ALLSETS; 309 ioctl->mdn_dev = MD_ALLDEVS; 310 uniqtime32(&ioctl->mdn_time); 311 ioctl->mdn_user = (u_longlong_t)0; 312 err = ddi_copyout(data, ioctl_in, sz, mode); 313 return (err); 314 } 315 316 /* 317 * md_wait_for_event: 318 * IOLOCK_RETURN which drops the md_ioctl_lock is called in this 319 * routine to enable other mdioctls to enter the kernel while this 320 * thread of execution waits on an event. When that event occurs, the 321 * stopped thread wakes and continues and md_ioctl_lock must be 322 * reacquired. Even though md_ioctl_lock is interruptable, we choose 323 * to ignore EINTR. Returning w/o acquiring md_ioctl_lock is 324 * catastrophic since it breaks down ioctl single threading. 325 * 326 * Return: 0 md_eventq_mx held 327 * EINTR md_eventq_mx no held 328 * Always returns with IOCTL lock held 329 */ 330 331 static int 332 md_wait_for_event(md_event_queue_t *event_queue, void *ioctl_in, 333 md_event_ioctl_t *ioctl, size_t sz, 334 int mode, IOLOCK *lockp) 335 { 336 int rval = 0; 337 338 while (event_queue->mdn_front == NULL) { 339 event_queue->mdn_waiting++; 340 (void) IOLOCK_RETURN(0, lockp); 341 rval = cv_wait_sig(&event_queue->mdn_cv, &md_eventq_mx); 342 event_queue->mdn_waiting--; 343 if ((rval == 0) || (event_queue->mdn_flags & 344 MD_EVENT_QUEUE_DESTROY)) { 345 global_lock_wait_cnt++; 346 mutex_exit(&md_eventq_mx); 347 /* reenable single threading of ioctls */ 348 while (md_ioctl_lock_enter() == EINTR); 349 350 (void) notify_fillin_empty_ioctl 351 ((void *)ioctl, ioctl_in, sz, mode); 352 mutex_enter(&md_eventq_mx); 353 global_lock_wait_cnt--; 354 mutex_exit(&md_eventq_mx); 355 return (EINTR); 356 } 357 /* 358 * reacquire single threading ioctls. Drop eventq_mutex 359 * since md_ioctl_lock_enter can sleep. 360 */ 361 global_lock_wait_cnt++; 362 mutex_exit(&md_eventq_mx); 363 while (md_ioctl_lock_enter() == EINTR); 364 mutex_enter(&md_eventq_mx); 365 global_lock_wait_cnt--; 366 } 367 return (0); 368 } 369 370 /* ARGSUSED */ 371 static int 372 notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp) 373 { 374 int cmd; 375 pid_t pid; 376 md_event_queue_t *event_queue; 377 md_event_t *event; 378 cred_t *credp; 379 char *q_name; 380 int err = 0; 381 size_t sz = 0; 382 md_event_ioctl_t *ioctl; 383 384 sz = sizeof (*ioctl); 385 ioctl = kmem_zalloc(sz, KM_SLEEP); 386 387 if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) { 388 err = EFAULT; 389 goto out; 390 } 391 392 if (ioctl->mdn_rev != MD_NOTIFY_REVISION) { 393 err = EINVAL; 394 goto out; 395 } 396 if (ioctl->mdn_magic != MD_EVENT_ID) { 397 err = EINVAL; 398 goto out; 399 } 400 401 pid = md_getpid(); 402 cmd = ioctl->mdn_cmd; 403 q_name = ioctl->mdn_name; 404 405 if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count)) 406 md_reaper(); 407 408 if ((cmd != EQ_ON) && (cmd != EQ_PUT)) { 409 mutex_enter(&md_eventq_mx); 410 if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) { 411 mutex_exit(&md_eventq_mx); 412 (void) notify_fillin_empty_ioctl 413 ((void *)ioctl, ioctl_in, sz, mode); 414 err = ENOENT; 415 goto out; 416 } 417 } 418 419 switch (cmd) { 420 case EQ_ON: 421 422 md_reaper(); 423 424 mutex_enter(&md_eventq_mx); 425 if (md_find_event_queue(q_name, 0) != NULL) { 426 mutex_exit(&md_eventq_mx); 427 err = EEXIST; 428 break; 429 } 430 431 /* allocate and initialize queue head */ 432 event_queue = (md_event_queue_t *) 433 kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP); 434 if (event_queue == NULL) { 435 mutex_exit(&md_eventq_mx); 436 err = ENOMEM; 437 break; 438 } 439 440 cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL); 441 442 event_queue->mdn_flags = 0; 443 event_queue->mdn_pid = pid; 444 event_queue->mdn_proc = md_getproc(); 445 event_queue->mdn_size = 0; 446 event_queue->mdn_front = NULL; 447 event_queue->mdn_tail = NULL; 448 event_queue->mdn_waiting = 0; 449 event_queue->mdn_nextq = NULL; 450 credp = ddi_get_cred(); 451 event_queue->mdn_uid = crgetuid(credp); 452 bcopy(q_name, event_queue->mdn_name, 453 MD_NOTIFY_NAME_SIZE); 454 if (ioctl->mdn_flags & EQ_Q_PERM) 455 event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM; 456 457 /* link into the list of event queues */ 458 if (md_event_queue != NULL) 459 event_queue->mdn_nextq = md_event_queue; 460 md_event_queue = event_queue; 461 mutex_exit(&md_eventq_mx); 462 err = 0; 463 break; 464 465 case EQ_OFF: 466 467 if (md_event_queue == NULL) 468 return (ENOENT); 469 470 event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY; 471 event_queue->mdn_pid = 0; 472 event_queue->mdn_proc = NULL; 473 474 if (event_queue->mdn_waiting != 0) 475 cv_broadcast(&event_queue->mdn_cv); 476 477 /* 478 * force the reaper to delete this when it has no process 479 * waiting on it. 480 */ 481 mutex_exit(&md_eventq_mx); 482 md_reaper(); 483 err = 0; 484 break; 485 486 case EQ_GET_NOWAIT: 487 case EQ_GET_WAIT: 488 if (cmd == EQ_GET_WAIT) { 489 err = md_wait_for_event(event_queue, ioctl_in, 490 ioctl, sz, mode, lockp); 491 if (err == EINTR) 492 goto out; 493 } 494 ASSERT(MUTEX_HELD(&md_eventq_mx)); 495 if (event_queue->mdn_flags & 496 (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) { 497 event_queue->mdn_flags &= 498 ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL); 499 mutex_exit(&md_eventq_mx); 500 err = notify_fillin_empty_ioctl 501 ((void *)ioctl, ioctl_in, sz, mode); 502 ioctl->mdn_event = EQ_NOTIFY_LOST; 503 err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode); 504 if (err) 505 err = EFAULT; 506 goto out; 507 } 508 if (event_queue->mdn_front != NULL) { 509 event = event_queue->mdn_front; 510 event_queue->mdn_front = event->mdn_next; 511 event_queue->mdn_size--; 512 if (event_queue->mdn_front == NULL) 513 event_queue->mdn_tail = NULL; 514 mutex_exit(&md_eventq_mx); 515 ioctl->mdn_tag = event->mdn_tag; 516 ioctl->mdn_set = event->mdn_set; 517 ioctl->mdn_dev = event->mdn_dev; 518 ioctl->mdn_event = event->mdn_event; 519 ioctl->mdn_user = event->mdn_user; 520 ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec; 521 ioctl->mdn_time.tv_usec = 522 event->mdn_time.tv_usec; 523 kmem_free(event, sizeof (md_event_t)); 524 err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode); 525 if (err) 526 err = EFAULT; 527 goto out; 528 } else { /* no elements on queue */ 529 mutex_exit(&md_eventq_mx); 530 err = notify_fillin_empty_ioctl 531 ((void *)ioctl, ioctl_in, sz, mode); 532 if (err) 533 err = EFAULT; 534 } 535 536 if (cmd == EQ_GET_NOWAIT) 537 err = EAGAIN; 538 goto out; 539 540 case EQ_PUT: 541 542 if (!md_event_queue) { 543 err = ENOENT; 544 break; 545 } 546 md_put_event(ioctl->mdn_tag, 547 ioctl->mdn_set, ioctl->mdn_dev, 548 ioctl->mdn_event, ioctl->mdn_user); 549 err = 0; 550 goto out; 551 552 default: 553 err = EINVAL; 554 goto out; 555 } 556 557 out: 558 kmem_free(ioctl, sz); 559 return (err); 560 } 561 562 /* 563 * Turn orphaned queue off for testing purposes. 564 */ 565 566 static intptr_t 567 notify_reap_off() 568 { 569 md_reap_off = 1; 570 return (0); 571 } 572 573 /* 574 * Turn reaping back on. 575 */ 576 577 static intptr_t 578 notify_reap_on() 579 { 580 md_reap_off = 0; 581 return (0); 582 } 583 584 /* 585 * Return information that is used to test the notification feature. 586 */ 587 588 static intptr_t 589 notify_test_stats(md_notify_stats_t *stats) 590 { 591 stats->mds_eventq_mx = &md_eventq_mx; 592 stats->mds_reap_count = md_reap_count; 593 stats->mds_reap = md_reap; 594 stats->mds_max_queue = md_max_notify_queue; 595 stats->mds_reap_off = md_reap_off; 596 return (0); 597 } 598 599 /* 600 * put this stuff at end so we don't have to create forward 601 * references for everything 602 */ 603 static struct modlmisc modlmisc = { 604 &mod_miscops, 605 "Solaris Volume Manager notification module" 606 }; 607 608 static struct modlinkage modlinkage = { 609 MODREV_1, { (void *)&modlmisc, NULL } 610 }; 611 612 static md_named_services_t notify_services[] = { 613 {notify_interface, "notify interface"}, 614 {notify_reap_off, MD_NOTIFY_REAP_OFF}, 615 {notify_reap_on, MD_NOTIFY_REAP_ON}, 616 {notify_test_stats, MD_NOTIFY_TEST_STATS}, 617 {NULL, 0} 618 }; 619 620 md_ops_t event_md_ops = { 621 NULL, /* open */ 622 NULL, /* close */ 623 NULL, /* strategy */ 624 NULL, /* print */ 625 NULL, /* dump */ 626 NULL, /* read */ 627 NULL, /* write */ 628 notify_ioctl, /* event_ioctls, */ 629 NULL, /* snarf */ 630 notify_halt, /* halt */ 631 NULL, /* aread */ 632 NULL, /* awrite */ 633 NULL, /* import set */ 634 notify_services /* named_services */ 635 }; 636 637 int 638 _init() 639 { 640 md_event_queue = NULL; 641 mutex_init(&md_eventq_mx, NULL, MUTEX_DEFAULT, NULL); 642 return (mod_install(&modlinkage)); 643 } 644 645 int 646 _fini() 647 { 648 int err = 0; 649 650 /* 651 * Don't allow the module to be unloaded while there is a thread 652 * of execution that is waiting for a global lock. 653 */ 654 if (global_lock_wait_cnt > 0) 655 return (EBUSY); 656 657 if ((err = mod_remove(&modlinkage)) != 0) 658 return (err); 659 660 md_clear_named_service(); 661 mutex_destroy(&md_eventq_mx); 662 return (err); 663 } 664 665 int 666 _info(struct modinfo *modinfop) 667 { 668 return (mod_info(&modlinkage, modinfop)); 669 }