1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
25 */
26
27 #include <sys/systm.h>
28 #include <sys/cmn_err.h>
29 #include <sys/errno.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/signal.h>
33 #include <sys/modctl.h>
34 #include <sys/proc.h>
35 #include <sys/lvm/mdvar.h>
36
37 md_ops_t event_md_ops;
38 #ifndef lint
39 md_ops_t *md_interface_ops = &event_md_ops;
40 #endif
41
42 extern void sigintr();
43 extern void sigunintr();
44 extern md_set_t md_set[];
45
46 extern kmutex_t md_mx; /* used to md global stuff */
47 extern kcondvar_t md_cv; /* md_status events */
48 extern int md_status;
49 extern clock_t md_hz;
50 extern md_event_queue_t *md_event_queue;
51 static void md_reaper();
52 extern void md_clear_named_service();
53
54 /* event handler stuff */
55 kmutex_t md_eventq_mx;
56 int md_reap_count = 32; /* check for pid alive */
57 int md_reap = 0;
58 int md_max_notify_queue = 512;
59 int md_reap_off = 0; /* non-zero turns off reap */
60 /* don't allow module to be unloaded until all pending ops are complete */
61 int global_lock_wait_cnt = 0;
62
63 static int
64 md_flush_queue(md_event_queue_t *queue)
65 {
66 md_event_t *element, *next_element;
67 /*
68 * if there is something waiting on it and the
69 * process/pid no longer exist then signal the defunct
70 * process continue on to clean this up later.
71 */
72 if (queue->mdn_waiting)
73 return (1);
74 /*
75 * this pid no longer exists blow it away
76 * first remove any entries, then unlink it and lastly
77 * free it.
78 */
79 element = queue->mdn_front;
80 while (element) {
81 next_element = element->mdn_next;
82 kmem_free(element, sizeof (md_event_t));
83 element = next_element;
84 }
85 queue->mdn_front = queue->mdn_tail = NULL;
86 return (0);
87
88 }
89
90 static void
91 md_put_event(md_tags_t tag, set_t sp, md_dev64_t dev, int event,
92 u_longlong_t user)
93 {
94
95 md_event_queue_t *queue;
96 md_event_t *entry;
97
98 if (!md_event_queue)
99 return;
100
101 mutex_enter(&md_eventq_mx);
102 for (queue = md_event_queue; queue; queue = queue->mdn_nextq) {
103 if (queue->mdn_size >= md_max_notify_queue) {
104 ASSERT(queue->mdn_front != NULL);
105 ASSERT(queue->mdn_front->mdn_next != NULL);
106 entry = queue->mdn_front;
107 queue->mdn_front = entry->mdn_next;
108 queue->mdn_size--;
109 queue->mdn_flags |= MD_EVENT_QUEUE_FULL;
110 } else
111 entry = (md_event_t *)kmem_alloc(sizeof (md_event_t),
112 KM_NOSLEEP);
113 if (entry == NULL) {
114 queue->mdn_flags |= MD_EVENT_QUEUE_INVALID;
115 continue;
116 }
117 entry->mdn_tag = tag;
118 entry->mdn_set = sp;
119 entry->mdn_dev = dev;
120 entry->mdn_event = event;
121 entry->mdn_user = user;
122 entry->mdn_next = NULL;
123 uniqtime(&entry->mdn_time);
124 if (queue->mdn_front == NULL) {
125 queue->mdn_front = entry;
126 queue->mdn_tail = entry;
127 } else {
128 queue->mdn_tail->mdn_next = entry;
129 queue->mdn_tail = entry;
130 }
131 if (queue->mdn_waiting)
132 cv_signal(&queue->mdn_cv);
133
134 queue->mdn_size++;
135 }
136 md_reap++;
137 mutex_exit(&md_eventq_mx);
138
139 if (md_reap > md_reap_count)
140 md_reaper();
141 }
142
143 static void
144 md_reaper()
145 {
146 md_event_queue_t *next = md_event_queue;
147 md_event_queue_t *present, *last = NULL;
148
149 if (md_event_queue == NULL || md_reap_off)
150 return;
151
152 mutex_enter(&md_eventq_mx);
153 while (next) {
154 present = next;
155 next = present->mdn_nextq;
156
157 /* check for long term event queue */
158 if (present->mdn_flags & MD_EVENT_QUEUE_PERM) {
159 last = present;
160 continue;
161 }
162
163 /* check to see if the pid is still alive */
164 if (!md_checkpid(present->mdn_pid, present->mdn_proc))
165 present->mdn_flags |= MD_EVENT_QUEUE_DESTROY;
166
167 /* see if queue is a "marked queue" if so destroy */
168 if (! (present->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
169 last = present;
170 continue;
171 }
172
173 /* yeeeha blow this one away */
174 present->mdn_pid = 0;
175 present->mdn_proc = NULL;
176 /*
177 * if there is something waiting on it and the
178 * process/pid no longer exist then signal the defunct
179 * process continue on to clean this up later.
180 */
181 if (md_flush_queue(present)) {
182 present->mdn_flags = MD_EVENT_QUEUE_DESTROY;
183 cv_broadcast(&present->mdn_cv);
184 last = present;
185 continue;
186 }
187 /* remove the entry */
188 if (last == NULL)
189 md_event_queue = next;
190 else
191 last->mdn_nextq = next;
192 cv_destroy(&present->mdn_cv);
193 kmem_free(present, sizeof (md_event_queue_t));
194 }
195 md_reap = 0;
196 mutex_exit(&md_eventq_mx);
197 }
198
199 /* ARGSUSED */
200 static int
201 notify_halt(md_haltcmd_t cmd, set_t setno)
202 {
203 md_event_queue_t *orig_queue, *queue, *queue_free;
204 int i;
205
206
207 switch (cmd) {
208 case MD_HALT_CLOSE:
209 case MD_HALT_OPEN:
210 case MD_HALT_DOIT:
211 case MD_HALT_CHECK:
212
213 return (0);
214
215 case MD_HALT_UNLOAD:
216 if (setno != MD_LOCAL_SET)
217 return (1);
218 mutex_enter(&md_eventq_mx);
219 if (md_event_queue == NULL) {
220 mutex_exit(&md_eventq_mx);
221 return (0);
222 }
223
224 orig_queue = md_event_queue;
225 md_event_queue = NULL;
226 for (i = 0; i < MD_NOTIFY_HALT_TRIES; i++) {
227 for (queue = orig_queue; queue;
228 queue = queue->mdn_nextq) {
229 if (queue->mdn_waiting == 0) {
230 continue;
231 }
232 queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
233 mutex_exit(&md_eventq_mx);
234 cv_broadcast(&queue->mdn_cv);
235 delay(md_hz);
236 mutex_enter(&md_eventq_mx);
237 }
238 }
239 for (queue = orig_queue; queue; ) {
240 if (md_flush_queue(queue)) {
241 cmn_err(CE_WARN, "md: queue not freed");
242 mutex_exit(&md_eventq_mx);
243 return (1);
244 }
245 queue_free = queue;
246 queue = queue->mdn_nextq;
247 kmem_free(queue_free, sizeof (md_event_queue_t));
248 }
249 md_event_queue = NULL;
250 mutex_exit(&md_eventq_mx);
251 return (0);
252
253 default:
254 return (1);
255 }
256 }
257
258 static md_event_queue_t *
259 md_find_event_queue(char *q_name, int lock)
260 {
261 md_event_queue_t *event_q = md_event_queue;
262
263 if (lock)
264 mutex_enter(&md_eventq_mx);
265 ASSERT(MUTEX_HELD(&md_eventq_mx));
266 while (event_q) {
267 if ((*event_q->mdn_name != *q_name) ||
268 (event_q->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
269 event_q = event_q->mdn_nextq;
270 continue;
271 }
272
273 if (bcmp(q_name, event_q->mdn_name, MD_NOTIFY_NAME_SIZE) == 0)
274 break;
275 event_q = event_q->mdn_nextq;
276 }
277 if (lock)
278 mutex_exit(&md_eventq_mx);
279
280 return ((md_event_queue_t *)event_q);
281 }
282
283 static intptr_t
284 notify_interface(md_event_cmds_t cmd, md_tags_t tag, set_t set, md_dev64_t dev,
285 md_event_type_t event)
286 {
287 switch (cmd) {
288 case EQ_PUT:
289 md_put_event(tag, set, dev, event, (u_longlong_t)0);
290 break;
291 default:
292 return (-1);
293 }
294 return (0);
295 }
296
297 static int
298 notify_fillin_empty_ioctl(void *data, void *ioctl_in, size_t sz,
299 int mode)
300 {
301
302 int err;
303 md_event_ioctl_t *ioctl = (md_event_ioctl_t *)data;
304
305
306 ioctl->mdn_event = EQ_EMPTY;
307 ioctl->mdn_tag = TAG_EMPTY;
308 ioctl->mdn_set = MD_ALLSETS;
309 ioctl->mdn_dev = MD_ALLDEVS;
310 uniqtime32(&ioctl->mdn_time);
311 ioctl->mdn_user = (u_longlong_t)0;
312 err = ddi_copyout(data, ioctl_in, sz, mode);
313 return (err);
314 }
315
316 /*
317 * md_wait_for_event:
318 * IOLOCK_RETURN which drops the md_ioctl_lock is called in this
319 * routine to enable other mdioctls to enter the kernel while this
320 * thread of execution waits on an event. When that event occurs, the
321 * stopped thread wakes and continues and md_ioctl_lock must be
322 * reacquired. Even though md_ioctl_lock is interruptable, we choose
323 * to ignore EINTR. Returning w/o acquiring md_ioctl_lock is
324 * catastrophic since it breaks down ioctl single threading.
325 *
326 * Return: 0 md_eventq_mx held
327 * EINTR md_eventq_mx no held
328 * Always returns with IOCTL lock held
329 */
330
331 static int
332 md_wait_for_event(md_event_queue_t *event_queue, void *ioctl_in,
333 md_event_ioctl_t *ioctl, size_t sz,
334 int mode, IOLOCK *lockp)
335 {
336 int rval = 0;
337
338 while (event_queue->mdn_front == NULL) {
339 event_queue->mdn_waiting++;
340 (void) IOLOCK_RETURN(0, lockp);
341 rval = cv_wait_sig(&event_queue->mdn_cv, &md_eventq_mx);
342 event_queue->mdn_waiting--;
343 if ((rval == 0) || (event_queue->mdn_flags &
344 MD_EVENT_QUEUE_DESTROY)) {
345 global_lock_wait_cnt++;
346 mutex_exit(&md_eventq_mx);
347 /* reenable single threading of ioctls */
348 while (md_ioctl_lock_enter() == EINTR);
349
350 (void) notify_fillin_empty_ioctl
351 ((void *)ioctl, ioctl_in, sz, mode);
352 mutex_enter(&md_eventq_mx);
353 global_lock_wait_cnt--;
354 mutex_exit(&md_eventq_mx);
355 return (EINTR);
356 }
357 /*
358 * reacquire single threading ioctls. Drop eventq_mutex
359 * since md_ioctl_lock_enter can sleep.
360 */
361 global_lock_wait_cnt++;
362 mutex_exit(&md_eventq_mx);
363 while (md_ioctl_lock_enter() == EINTR);
364 mutex_enter(&md_eventq_mx);
365 global_lock_wait_cnt--;
366 }
367 return (0);
368 }
369
370 /* ARGSUSED */
371 static int
372 notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp)
373 {
374 int cmd;
375 pid_t pid;
376 md_event_queue_t *event_queue;
377 md_event_t *event;
378 cred_t *credp;
379 char *q_name;
380 int err = 0;
381 size_t sz = 0;
382 md_event_ioctl_t *ioctl;
383
384 sz = sizeof (*ioctl);
385 ioctl = kmem_zalloc(sz, KM_SLEEP);
386
387 if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) {
388 err = EFAULT;
389 goto out;
390 }
391
392 if (ioctl->mdn_rev != MD_NOTIFY_REVISION) {
393 err = EINVAL;
394 goto out;
395 }
396 if (ioctl->mdn_magic != MD_EVENT_ID) {
397 err = EINVAL;
398 goto out;
399 }
400
401 pid = md_getpid();
402 cmd = ioctl->mdn_cmd;
403 q_name = ioctl->mdn_name;
404
405 if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count))
406 md_reaper();
407
408 if ((cmd != EQ_ON) && (cmd != EQ_PUT)) {
409 mutex_enter(&md_eventq_mx);
410 if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) {
411 mutex_exit(&md_eventq_mx);
412 (void) notify_fillin_empty_ioctl
413 ((void *)ioctl, ioctl_in, sz, mode);
414 err = ENOENT;
415 goto out;
416 }
417 }
418
419 switch (cmd) {
420 case EQ_ON:
421
422 md_reaper();
423
424 mutex_enter(&md_eventq_mx);
425 if (md_find_event_queue(q_name, 0) != NULL) {
426 mutex_exit(&md_eventq_mx);
427 err = EEXIST;
428 break;
429 }
430
431 /* allocate and initialize queue head */
432 event_queue = (md_event_queue_t *)
433 kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP);
434 if (event_queue == NULL) {
435 mutex_exit(&md_eventq_mx);
436 err = ENOMEM;
437 break;
438 }
439
440 cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL);
441
442 event_queue->mdn_flags = 0;
443 event_queue->mdn_pid = pid;
444 event_queue->mdn_proc = md_getproc();
445 event_queue->mdn_size = 0;
446 event_queue->mdn_front = NULL;
447 event_queue->mdn_tail = NULL;
448 event_queue->mdn_waiting = 0;
449 event_queue->mdn_nextq = NULL;
450 credp = ddi_get_cred();
451 event_queue->mdn_uid = crgetuid(credp);
452 bcopy(q_name, event_queue->mdn_name,
453 MD_NOTIFY_NAME_SIZE);
454 if (ioctl->mdn_flags & EQ_Q_PERM)
455 event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM;
456
457 /* link into the list of event queues */
458 if (md_event_queue != NULL)
459 event_queue->mdn_nextq = md_event_queue;
460 md_event_queue = event_queue;
461 mutex_exit(&md_eventq_mx);
462 err = 0;
463 break;
464
465 case EQ_OFF:
466
467 if (md_event_queue == NULL)
468 return (ENOENT);
469
470 event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
471 event_queue->mdn_pid = 0;
472 event_queue->mdn_proc = NULL;
473
474 if (event_queue->mdn_waiting != 0)
475 cv_broadcast(&event_queue->mdn_cv);
476
477 /*
478 * force the reaper to delete this when it has no process
479 * waiting on it.
480 */
481 mutex_exit(&md_eventq_mx);
482 md_reaper();
483 err = 0;
484 break;
485
486 case EQ_GET_NOWAIT:
487 case EQ_GET_WAIT:
488 if (cmd == EQ_GET_WAIT) {
489 err = md_wait_for_event(event_queue, ioctl_in,
490 ioctl, sz, mode, lockp);
491 if (err == EINTR)
492 goto out;
493 }
494 ASSERT(MUTEX_HELD(&md_eventq_mx));
495 if (event_queue->mdn_flags &
496 (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) {
497 event_queue->mdn_flags &=
498 ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL);
499 mutex_exit(&md_eventq_mx);
500 err = notify_fillin_empty_ioctl
501 ((void *)ioctl, ioctl_in, sz, mode);
502 ioctl->mdn_event = EQ_NOTIFY_LOST;
503 err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
504 if (err)
505 err = EFAULT;
506 goto out;
507 }
508 if (event_queue->mdn_front != NULL) {
509 event = event_queue->mdn_front;
510 event_queue->mdn_front = event->mdn_next;
511 event_queue->mdn_size--;
512 if (event_queue->mdn_front == NULL)
513 event_queue->mdn_tail = NULL;
514 mutex_exit(&md_eventq_mx);
515 ioctl->mdn_tag = event->mdn_tag;
516 ioctl->mdn_set = event->mdn_set;
517 ioctl->mdn_dev = event->mdn_dev;
518 ioctl->mdn_event = event->mdn_event;
519 ioctl->mdn_user = event->mdn_user;
520 ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec;
521 ioctl->mdn_time.tv_usec =
522 event->mdn_time.tv_usec;
523 kmem_free(event, sizeof (md_event_t));
524 err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
525 if (err)
526 err = EFAULT;
527 goto out;
528 } else { /* no elements on queue */
529 mutex_exit(&md_eventq_mx);
530 err = notify_fillin_empty_ioctl
531 ((void *)ioctl, ioctl_in, sz, mode);
532 if (err)
533 err = EFAULT;
534 }
535
536 if (cmd == EQ_GET_NOWAIT)
537 err = EAGAIN;
538 goto out;
539
540 case EQ_PUT:
541
542 if (!md_event_queue) {
543 err = ENOENT;
544 break;
545 }
546 md_put_event(ioctl->mdn_tag,
547 ioctl->mdn_set, ioctl->mdn_dev,
548 ioctl->mdn_event, ioctl->mdn_user);
549 err = 0;
550 goto out;
551
552 default:
553 err = EINVAL;
554 goto out;
555 }
556
557 out:
558 kmem_free(ioctl, sz);
559 return (err);
560 }
561
562 /*
563 * Turn orphaned queue off for testing purposes.
564 */
565
566 static intptr_t
567 notify_reap_off()
568 {
569 md_reap_off = 1;
570 return (0);
571 }
572
573 /*
574 * Turn reaping back on.
575 */
576
577 static intptr_t
578 notify_reap_on()
579 {
580 md_reap_off = 0;
581 return (0);
582 }
583
584 /*
585 * Return information that is used to test the notification feature.
586 */
587
588 static intptr_t
589 notify_test_stats(md_notify_stats_t *stats)
590 {
591 stats->mds_eventq_mx = &md_eventq_mx;
592 stats->mds_reap_count = md_reap_count;
593 stats->mds_reap = md_reap;
594 stats->mds_max_queue = md_max_notify_queue;
595 stats->mds_reap_off = md_reap_off;
596 return (0);
597 }
598
599 /*
600 * put this stuff at end so we don't have to create forward
601 * references for everything
602 */
603 static struct modlmisc modlmisc = {
604 &mod_miscops,
605 "Solaris Volume Manager notification module"
606 };
607
608 static struct modlinkage modlinkage = {
609 MODREV_1, (void *)&modlmisc, NULL
610 };
611
612 static md_named_services_t notify_services[] = {
613 {notify_interface, "notify interface"},
614 {notify_reap_off, MD_NOTIFY_REAP_OFF},
615 {notify_reap_on, MD_NOTIFY_REAP_ON},
616 {notify_test_stats, MD_NOTIFY_TEST_STATS},
617 {NULL, 0}
618 };
619
620 md_ops_t event_md_ops = {
621 NULL, /* open */
622 NULL, /* close */
623 NULL, /* strategy */
624 NULL, /* print */
625 NULL, /* dump */
626 NULL, /* read */
627 NULL, /* write */
628 notify_ioctl, /* event_ioctls, */
629 NULL, /* snarf */
630 notify_halt, /* halt */
631 NULL, /* aread */
632 NULL, /* awrite */
633 NULL, /* import set */
634 notify_services /* named_services */
635 };
636
637 int
638 _init()
639 {
640 md_event_queue = NULL;
641 mutex_init(&md_eventq_mx, NULL, MUTEX_DEFAULT, NULL);
642 return (mod_install(&modlinkage));
643 }
644
645 int
646 _fini()
647 {
648 int err = 0;
649
650 /*
651 * Don't allow the module to be unloaded while there is a thread
652 * of execution that is waiting for a global lock.
653 */
654 if (global_lock_wait_cnt > 0)
655 return (EBUSY);
656
657 if ((err = mod_remove(&modlinkage)) != 0)
658 return (err);
659
660 md_clear_named_service();
661 mutex_destroy(&md_eventq_mx);
662 return (err);
663 }
664
665 int
666 _info(struct modinfo *modinfop)
667 {
668 return (mod_info(&modlinkage, modinfop));
669 }