1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2015 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Support for the signalfd facility, a Linux-borne facility for
  18  * file descriptor-based synchronous signal consumption.
  19  *
  20  * As described on the signalfd(3C) man page, the general idea behind these
  21  * file descriptors is that they can be used to synchronously consume signals
  22  * via the read(2) syscall. That capability already exists with the
  23  * sigwaitinfo(3C) function but the key advantage of signalfd is that, because
  24  * it is file descriptor based, poll(2) can be used to determine when signals
  25  * are available to be consumed.
  26  *
  27  * The general implementation uses signalfd_state to hold both the signal set
  28  * and poll head for an open file descriptor. Because a process can be using
  29  * different sigfds with different signal sets, each signalfd_state poll head
  30  * can be thought of as an independent signal stream and the thread(s) waiting
  31  * on that stream will get poll notification when any signal in the
  32  * corresponding set is received.
  33  *
  34  * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc
  35  * state for function callbacks and data when the proc needs to do work during
  36  * signal delivery for pollwakeup.
  37  *
  38  * The read side of the implementation is straightforward and mimics the
  39  * kernel behavior for sigtimedwait(). Signals continue to live on either
  40  * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so
  41  * that it is no longer pending.
  42  *
  43  * The poll side is more complex since all of the sigfds on the process need
  44  * to be examined every time a signal is delivered to the process in order to
  45  * pollwake any thread waiting in poll for that signal.
  46  *
  47  * Because it is likely that a process will only be using one, or a few, sigfds,
  48  * but many total file descriptors, we maintain a list of sigfds which need
  49  * pollwakeup. The list lives on the proc's p_sigfd struct. In this way only
  50  * zero, or a few, of the state structs will need to be examined every time a
  51  * signal is delivered to the process, instead of having to examine all of the
  52  * file descriptors to find the state structs. When a state struct with a
  53  * matching signal set is found then pollwakeup is called.
  54  *
  55  * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
  56  * will clear out on its own. There is an exit helper (signalfd_exit_helper)
  57  * which cleans up any remaining per-proc state when the process exits.
  58  *
  59  * The main complexity with signalfd is the interaction of forking and polling.
  60  * This interaction is complex because now two processes have a fd that
  61  * references the same dev_t (and its associated signalfd_state), but signals
  62  * go to only one of those processes. Also, we don't know when one of the
  63  * processes closes its fd because our 'close' entry point is only called when
  64  * the last fd is closed (which could be by either process).
  65  *
  66  * Because the state struct is referenced by both file descriptors, and the
  67  * state struct represents a signal stream needing a pollwakeup, if both
  68  * processes were polling then both processes would get a pollwakeup when a
  69  * signal arrives for either process (that is, the pollhead is associated with
  70  * our dev_t so when a signal arrives the pollwakeup wakes up all waiters).
  71  *
  72  * Fortunately this is not a common problem in practice, but the implementation
  73  * attempts to mitigate unexpected behavior. The typical behavior is that the
  74  * parent has been polling the signalfd (which is why it was open in the first
  75  * place) and the parent might have a pending signalfd_state (with the
  76  * pollhead) on its per-process sigfd_list. After the fork the child will
  77  * simply close that fd (among others) as part of the typical fork/close/exec
  78  * pattern. Because the child will never poll that fd, it will never get any
  79  * state onto its own sigfd_list (the child starts with a null list). The
  80  * intention is that the child sees no pollwakeup activity for signals unless
  81  * it explicitly reinvokes poll on the sigfd.
  82  *
  83  * As background, there are two primary polling cases to consider when the
  84  * parent process forks:
  85  * 1) If any thread is blocked in poll(2) then both the parent and child will
  86  *    return from the poll syscall with EINTR. This means that if either
  87  *    process wants to re-poll on a sigfd then it needs to re-run poll and
  88  *    would come back in to the signalfd_poll entry point. The parent would
  89  *    already have the dev_t's state on its sigfd_list and the child would not
  90  *    have anything there unless it called poll again on its fd.
  91  * 2) If the process is using /dev/poll(7D) then the polling info is being
  92  *    cached by the poll device and the process might not currently be blocked
  93  *    on anything polling related. A subsequent DP_POLL ioctl will not invoke
  94  *    our signalfd_poll entry point again. Because the parent still has its
  95  *    sigfd_list setup, an incoming signal will hit our signalfd_pollwake_cb
  96  *    entry point, which in turn calls pollwake, and /dev/poll will do the
  97  *    right thing on DP_POLL. The child will not have a sigfd_list yet so the
  98  *    signal will not cause a pollwakeup. The dp code does its own handling for
  99  *    cleaning up its cache.
 100  *
 101  * This leaves only one odd corner case. If the parent and child both use
 102  * the dup-ed sigfd to poll then when a signal is delivered to either process
 103  * there is no way to determine which one should get the pollwakeup (since
 104  * both processes will be queued on the same signal stream poll head). What
 105  * happens in this case is that both processes will return from poll, but only
 106  * one of them will actually have a signal to read. The other will return
 107  * from read with EAGAIN, or block. This case is actually similar to the
 108  * situation within a single process which got two different sigfd's with the
 109  * same mask (or poll on two fd's that are dup-ed). Both would return from poll
 110  * when a signal arrives but only one read would consume the signal and the
 111  * other read would fail or block. Applications which poll on shared fd's
 112  * cannot assume that a subsequent read will actually obtain data.
 113  */
 114 
 115 #include <sys/ddi.h>
 116 #include <sys/sunddi.h>
 117 #include <sys/signalfd.h>
 118 #include <sys/conf.h>
 119 #include <sys/sysmacros.h>
 120 #include <sys/filio.h>
 121 #include <sys/stat.h>
 122 #include <sys/file.h>
 123 #include <sys/schedctl.h>
 124 #include <sys/id_space.h>
 125 #include <sys/sdt.h>
 126 
 127 typedef struct signalfd_state signalfd_state_t;
 128 
 129 struct signalfd_state {
 130         kmutex_t sfd_lock;                      /* lock protecting state */
 131         pollhead_t sfd_pollhd;                  /* poll head */
 132         k_sigset_t sfd_set;                     /* signals for this fd */
 133         signalfd_state_t *sfd_next;             /* next state on global list */
 134 };
 135 
 136 /*
 137  * Internal global variables.
 138  */
 139 static kmutex_t         signalfd_lock;          /* lock protecting state */
 140 static dev_info_t       *signalfd_devi;         /* device info */
 141 static id_space_t       *signalfd_minor;        /* minor number arena */
 142 static void             *signalfd_softstate;    /* softstate pointer */
 143 static signalfd_state_t *signalfd_state;        /* global list of state */
 144 
 145 /*
 146  * If we don't already have an entry in the proc's list for this state, add one.
 147  */
 148 static void
 149 signalfd_wake_list_add(signalfd_state_t *state)
 150 {
 151         proc_t *p = curproc;
 152         list_t *lst;
 153         sigfd_wake_list_t *wlp;
 154 
 155         ASSERT(MUTEX_HELD(&p->p_lock));
 156         ASSERT(p->p_sigfd != NULL);
 157 
 158         lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
 159         for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
 160                 if (wlp->sigfd_wl_state == state)
 161                         break;
 162         }
 163 
 164         if (wlp == NULL) {
 165                 wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP);
 166                 wlp->sigfd_wl_state = state;
 167                 list_insert_head(lst, wlp);
 168         }
 169 }
 170 
 171 static void
 172 signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp)
 173 {
 174         list_remove(lst, wlp);
 175         kmem_free(wlp, sizeof (sigfd_wake_list_t));
 176 }
 177 
 178 static void
 179 signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state)
 180 {
 181         sigfd_wake_list_t *wlp;
 182         list_t *lst;
 183 
 184         ASSERT(MUTEX_HELD(&p->p_lock));
 185 
 186         if (p->p_sigfd == NULL)
 187                 return;
 188 
 189         lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
 190         for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
 191                 if (wlp->sigfd_wl_state == state) {
 192                         signalfd_wake_rm(lst, wlp);
 193                         break;
 194                 }
 195         }
 196 
 197         if (list_is_empty(lst)) {
 198                 ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
 199                 list_destroy(lst);
 200                 kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
 201                 p->p_sigfd = NULL;
 202         }
 203 }
 204 
 205 static void
 206 signalfd_wake_list_cleanup(proc_t *p)
 207 {
 208         sigfd_wake_list_t *wlp;
 209         list_t *lst;
 210 
 211         ASSERT(MUTEX_HELD(&p->p_lock));
 212 
 213         ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
 214 
 215         lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
 216         while (!list_is_empty(lst)) {
 217                 wlp = (sigfd_wake_list_t *)list_remove_head(lst);
 218                 kmem_free(wlp, sizeof (sigfd_wake_list_t));
 219         }
 220 }
 221 
 222 static void
 223 signalfd_exit_helper(void)
 224 {
 225         proc_t *p = curproc;
 226         list_t *lst;
 227 
 228         /* This being non-null is the only way we can get here */
 229         ASSERT(p->p_sigfd != NULL);
 230 
 231         mutex_enter(&p->p_lock);
 232         lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
 233 
 234         signalfd_wake_list_cleanup(p);
 235         list_destroy(lst);
 236         kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
 237         p->p_sigfd = NULL;
 238         mutex_exit(&p->p_lock);
 239 }
 240 
 241 /*
 242  * Called every time a signal is delivered to the process so that we can
 243  * see if any signal stream needs a pollwakeup. We maintain a list of
 244  * signal state elements so that we don't have to look at every file descriptor
 245  * on the process. If necessary, a further optimization would be to maintain a
 246  * signal set mask that is a union of all of the sets in the list so that
 247  * we don't even traverse the list if the signal is not in one of the elements.
 248  * However, since the list is likely to be very short, this is not currently
 249  * being done. A more complex data structure might also be used, but it is
 250  * unclear what that would be since each signal set needs to be checked for a
 251  * match.
 252  */
 253 static void
 254 signalfd_pollwake_cb(void *arg0, int sig)
 255 {
 256         proc_t *p = (proc_t *)arg0;
 257         list_t *lst;
 258         sigfd_wake_list_t *wlp;
 259 
 260         ASSERT(MUTEX_HELD(&p->p_lock));
 261 
 262         if (p->p_sigfd == NULL)
 263                 return;
 264 
 265         lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
 266         wlp = list_head(lst);
 267         while (wlp != NULL) {
 268                 signalfd_state_t *state = wlp->sigfd_wl_state;
 269 
 270                 mutex_enter(&state->sfd_lock);
 271 
 272                 if (sigismember(&state->sfd_set, sig) &&
 273                     state->sfd_pollhd.ph_list != NULL) {
 274                         sigfd_wake_list_t *tmp = wlp;
 275 
 276                         /* remove it from the list */
 277                         wlp = list_next(lst, wlp);
 278                         signalfd_wake_rm(lst, tmp);
 279 
 280                         mutex_exit(&state->sfd_lock);
 281                         pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN);
 282                 } else {
 283                         mutex_exit(&state->sfd_lock);
 284                         wlp = list_next(lst, wlp);
 285                 }
 286         }
 287 }
 288 
 289 _NOTE(ARGSUSED(1))
 290 static int
 291 signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 292 {
 293         signalfd_state_t *state;
 294         major_t major = getemajor(*devp);
 295         minor_t minor = getminor(*devp);
 296 
 297         if (minor != SIGNALFDMNRN_SIGNALFD)
 298                 return (ENXIO);
 299 
 300         mutex_enter(&signalfd_lock);
 301 
 302         minor = (minor_t)id_allocff(signalfd_minor);
 303 
 304         if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
 305                 id_free(signalfd_minor, minor);
 306                 mutex_exit(&signalfd_lock);
 307                 return (ENODEV);
 308         }
 309 
 310         state = ddi_get_soft_state(signalfd_softstate, minor);
 311         *devp = makedevice(major, minor);
 312 
 313         state->sfd_next = signalfd_state;
 314         signalfd_state = state;
 315 
 316         mutex_exit(&signalfd_lock);
 317 
 318         return (0);
 319 }
 320 
 321 /*
 322  * Consume one signal from our set in a manner similar to sigtimedwait().
 323  * The block parameter is used to control whether we wait for a signal or
 324  * return immediately if no signal is pending. We use the thread's t_sigwait
 325  * member in the same way that it is used by sigtimedwait.
 326  *
 327  * Return 0 if we successfully consumed a signal or an errno if not.
 328  */
 329 static int
 330 consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
 331 {
 332         k_sigset_t oldmask;
 333         kthread_t *t = curthread;
 334         klwp_t *lwp = ttolwp(t);
 335         proc_t *p = ttoproc(t);
 336         timespec_t now;
 337         timespec_t *rqtp = NULL;        /* null means blocking */
 338         int timecheck = 0;
 339         int ret = 0;
 340         k_siginfo_t info, *infop;
 341         signalfd_siginfo_t ssi, *ssp = &ssi;
 342 
 343         if (block == B_FALSE) {
 344                 timecheck = timechanged;
 345                 gethrestime(&now);
 346                 rqtp = &now;        /* non-blocking check for pending signals */
 347         }
 348 
 349         t->t_sigwait = set;
 350 
 351         mutex_enter(&p->p_lock);
 352         /*
 353          * set the thread's signal mask to unmask those signals in the
 354          * specified set.
 355          */
 356         schedctl_finish_sigblock(t);
 357         oldmask = t->t_hold;
 358         sigdiffset(&t->t_hold, &t->t_sigwait);
 359 
 360         /*
 361          * Based on rqtp, wait indefinitely until we take a signal in our set
 362          * or return immediately if there are no signals pending from our set.
 363          */
 364         while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock, rqtp,
 365             timecheck)) > 0)
 366                 continue;
 367 
 368         /* Restore thread's signal mask to its previous value. */
 369         t->t_hold = oldmask;
 370         t->t_sig_check = 1;  /* so post_syscall sees new t_hold mask */
 371 
 372         if (ret == -1) {
 373                 /* no signals pending */
 374                 mutex_exit(&p->p_lock);
 375                 sigemptyset(&t->t_sigwait);
 376                 return (EAGAIN);        /* no signals pending */
 377         }
 378 
 379         /* Don't bother with signal if it is not in request set. */
 380         if (lwp->lwp_cursig == 0 ||
 381             !sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
 382                 mutex_exit(&p->p_lock);
 383                 /*
 384                  * lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
 385                  * This happens if some other thread in this process called
 386                  * forkall() or exit().
 387                  */
 388                 sigemptyset(&t->t_sigwait);
 389                 return (EINTR);
 390         }
 391 
 392         if (lwp->lwp_curinfo) {
 393                 infop = &lwp->lwp_curinfo->sq_info;
 394         } else {
 395                 infop = &info;
 396                 bzero(infop, sizeof (info));
 397                 infop->si_signo = lwp->lwp_cursig;
 398                 infop->si_code = SI_NOINFO;
 399         }
 400 
 401         lwp->lwp_ru.nsignals++;
 402 
 403         DTRACE_PROC2(signal__clear, int, ret, ksiginfo_t *, infop);
 404         lwp->lwp_cursig = 0;
 405         lwp->lwp_extsig = 0;
 406         mutex_exit(&p->p_lock);
 407 
 408         /* Convert k_siginfo into external, datamodel independent, struct. */
 409         bzero(ssp, sizeof (*ssp));
 410         ssp->ssi_signo = infop->si_signo;
 411         ssp->ssi_errno = infop->si_errno;
 412         ssp->ssi_code = infop->si_code;
 413         ssp->ssi_pid = infop->si_pid;
 414         ssp->ssi_uid = infop->si_uid;
 415         ssp->ssi_fd = infop->si_fd;
 416         ssp->ssi_band = infop->si_band;
 417         ssp->ssi_trapno = infop->si_trapno;
 418         ssp->ssi_status = infop->si_status;
 419         ssp->ssi_utime = infop->si_utime;
 420         ssp->ssi_stime = infop->si_stime;
 421         ssp->ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
 422 
 423         ret = uiomove(ssp, sizeof (*ssp), UIO_READ, uio);
 424 
 425         if (lwp->lwp_curinfo) {
 426                 siginfofree(lwp->lwp_curinfo);
 427                 lwp->lwp_curinfo = NULL;
 428         }
 429         sigemptyset(&t->t_sigwait);
 430         return (ret);
 431 }
 432 
 433 /*
 434  * This is similar to sigtimedwait. Based on the fd mode we may wait until a
 435  * signal within our specified set is posted. We consume as many available
 436  * signals within our set as we can.
 437  */
 438 _NOTE(ARGSUSED(2))
 439 static int
 440 signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
 441 {
 442         signalfd_state_t *state;
 443         minor_t minor = getminor(dev);
 444         boolean_t block = B_TRUE;
 445         k_sigset_t set;
 446         boolean_t got_one = B_FALSE;
 447         int res;
 448 
 449         if (uio->uio_resid < sizeof (signalfd_siginfo_t))
 450                 return (EINVAL);
 451 
 452         state = ddi_get_soft_state(signalfd_softstate, minor);
 453 
 454         if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
 455                 block = B_FALSE;
 456 
 457         mutex_enter(&state->sfd_lock);
 458         set = state->sfd_set;
 459         mutex_exit(&state->sfd_lock);
 460 
 461         if (sigisempty(&set))
 462                 return (set_errno(EINVAL));
 463 
 464         do  {
 465                 res = consume_signal(state->sfd_set, uio, block);
 466                 if (res == 0)
 467                         got_one = B_TRUE;
 468 
 469                 /*
 470                  * After consuming one signal we won't block trying to consume
 471                  * further signals.
 472                  */
 473                 block = B_FALSE;
 474         } while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
 475 
 476         if (got_one)
 477                 res = 0;
 478 
 479         return (res);
 480 }
 481 
 482 /*
 483  * If ksigset_t's were a single word, we would do:
 484  *      return (((p->p_sig | t->t_sig) & set) & fillset);
 485  */
 486 static int
 487 signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
 488 {
 489         return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
 490             set.__sigbits[0]) |
 491             ((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
 492             set.__sigbits[1]) |
 493             (((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
 494             set.__sigbits[2]) & FILLSET2));
 495 }
 496 
 497 _NOTE(ARGSUSED(4))
 498 static int
 499 signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
 500     struct pollhead **phpp)
 501 {
 502         signalfd_state_t *state;
 503         minor_t minor = getminor(dev);
 504         kthread_t *t = curthread;
 505         proc_t *p = ttoproc(t);
 506         short revents = 0;
 507 
 508         state = ddi_get_soft_state(signalfd_softstate, minor);
 509 
 510         mutex_enter(&state->sfd_lock);
 511 
 512         if (signalfd_sig_pending(p, t, state->sfd_set) != 0)
 513                 revents |= POLLRDNORM | POLLIN;
 514 
 515         mutex_exit(&state->sfd_lock);
 516 
 517         if (!(*reventsp = revents & events) && !anyyet) {
 518                 *phpp = &state->sfd_pollhd;
 519 
 520                 /*
 521                  * Enable pollwakeup handling.
 522                  */
 523                 if (p->p_sigfd == NULL) {
 524                         sigfd_proc_state_t *pstate;
 525 
 526                         pstate = kmem_zalloc(sizeof (sigfd_proc_state_t),
 527                             KM_SLEEP);
 528                         list_create(&pstate->sigfd_list,
 529                             sizeof (sigfd_wake_list_t),
 530                             offsetof(sigfd_wake_list_t, sigfd_wl_lst));
 531 
 532                         mutex_enter(&p->p_lock);
 533                         /* check again now that we're locked */
 534                         if (p->p_sigfd == NULL) {
 535                                 p->p_sigfd = pstate;
 536                         } else {
 537                                 /* someone beat us to it */
 538                                 list_destroy(&pstate->sigfd_list);
 539                                 kmem_free(pstate, sizeof (sigfd_proc_state_t));
 540                         }
 541                         mutex_exit(&p->p_lock);
 542                 }
 543 
 544                 mutex_enter(&p->p_lock);
 545                 if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb ==
 546                     NULL) {
 547                         ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb =
 548                             signalfd_pollwake_cb;
 549                 }
 550                 signalfd_wake_list_add(state);
 551                 mutex_exit(&p->p_lock);
 552         }
 553 
 554         return (0);
 555 }
 556 
 557 _NOTE(ARGSUSED(4))
 558 static int
 559 signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
 560 {
 561         signalfd_state_t *state;
 562         minor_t minor = getminor(dev);
 563         sigset_t mask;
 564 
 565         state = ddi_get_soft_state(signalfd_softstate, minor);
 566 
 567         switch (cmd) {
 568         case SIGNALFDIOC_MASK:
 569                 if (ddi_copyin((caddr_t)arg, (caddr_t)&mask, sizeof (sigset_t),
 570                     md) != 0)
 571                         return (set_errno(EFAULT));
 572 
 573                 mutex_enter(&state->sfd_lock);
 574                 sigutok(&mask, &state->sfd_set);
 575                 mutex_exit(&state->sfd_lock);
 576 
 577                 return (0);
 578 
 579         default:
 580                 break;
 581         }
 582 
 583         return (ENOTTY);
 584 }
 585 
 586 _NOTE(ARGSUSED(1))
 587 static int
 588 signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 589 {
 590         signalfd_state_t *state, **sp;
 591         minor_t minor = getminor(dev);
 592         proc_t *p = curproc;
 593 
 594         state = ddi_get_soft_state(signalfd_softstate, minor);
 595 
 596         if (state->sfd_pollhd.ph_list != NULL) {
 597                 pollwakeup(&state->sfd_pollhd, POLLERR);
 598                 pollhead_clean(&state->sfd_pollhd);
 599         }
 600 
 601         /* Make sure our state is removed from our proc's pollwake list. */
 602         mutex_enter(&p->p_lock);
 603         signalfd_wake_list_rm(p, state);
 604         mutex_exit(&p->p_lock);
 605 
 606         mutex_enter(&signalfd_lock);
 607 
 608         /* Remove our state from our global list. */
 609         for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next))
 610                 VERIFY(*sp != NULL);
 611 
 612         *sp = (*sp)->sfd_next;
 613 
 614         ddi_soft_state_free(signalfd_softstate, minor);
 615         id_free(signalfd_minor, minor);
 616 
 617         mutex_exit(&signalfd_lock);
 618 
 619         return (0);
 620 }
 621 
 622 static int
 623 signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 624 {
 625         if (cmd != DDI_ATTACH || signalfd_devi != NULL)
 626                 return (DDI_FAILURE);
 627 
 628         mutex_enter(&signalfd_lock);
 629 
 630         signalfd_minor = id_space_create("signalfd_minor", 1, L_MAXMIN32 + 1);
 631         if (signalfd_minor == NULL) {
 632                 cmn_err(CE_WARN, "signalfd couldn't create id space");
 633                 mutex_exit(&signalfd_lock);
 634                 return (DDI_FAILURE);
 635         }
 636 
 637         if (ddi_soft_state_init(&signalfd_softstate,
 638             sizeof (signalfd_state_t), 0) != 0) {
 639                 cmn_err(CE_WARN, "signalfd failed to create soft state");
 640                 id_space_destroy(signalfd_minor);
 641                 mutex_exit(&signalfd_lock);
 642                 return (DDI_FAILURE);
 643         }
 644 
 645         if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
 646             SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 647                 cmn_err(CE_NOTE, "/dev/signalfd couldn't create minor node");
 648                 ddi_soft_state_fini(&signalfd_softstate);
 649                 id_space_destroy(signalfd_minor);
 650                 mutex_exit(&signalfd_lock);
 651                 return (DDI_FAILURE);
 652         }
 653 
 654         ddi_report_dev(devi);
 655         signalfd_devi = devi;
 656 
 657         sigfd_exit_helper = signalfd_exit_helper;
 658 
 659         mutex_exit(&signalfd_lock);
 660 
 661         return (DDI_SUCCESS);
 662 }
 663 
 664 _NOTE(ARGSUSED(0))
 665 static int
 666 signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 667 {
 668         switch (cmd) {
 669         case DDI_DETACH:
 670                 break;
 671 
 672         default:
 673                 return (DDI_FAILURE);
 674         }
 675 
 676         /* list should be empty */
 677         VERIFY(signalfd_state == NULL);
 678 
 679         mutex_enter(&signalfd_lock);
 680         id_space_destroy(signalfd_minor);
 681 
 682         ddi_remove_minor_node(signalfd_devi, NULL);
 683         signalfd_devi = NULL;
 684         sigfd_exit_helper = NULL;
 685 
 686         ddi_soft_state_fini(&signalfd_softstate);
 687         mutex_exit(&signalfd_lock);
 688 
 689         return (DDI_SUCCESS);
 690 }
 691 
 692 _NOTE(ARGSUSED(0))
 693 static int
 694 signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 695 {
 696         int error;
 697 
 698         switch (infocmd) {
 699         case DDI_INFO_DEVT2DEVINFO:
 700                 *result = (void *)signalfd_devi;
 701                 error = DDI_SUCCESS;
 702                 break;
 703         case DDI_INFO_DEVT2INSTANCE:
 704                 *result = (void *)0;
 705                 error = DDI_SUCCESS;
 706                 break;
 707         default:
 708                 error = DDI_FAILURE;
 709         }
 710         return (error);
 711 }
 712 
 713 static struct cb_ops signalfd_cb_ops = {
 714         signalfd_open,          /* open */
 715         signalfd_close,         /* close */
 716         nulldev,                /* strategy */
 717         nulldev,                /* print */
 718         nodev,                  /* dump */
 719         signalfd_read,          /* read */
 720         nodev,                  /* write */
 721         signalfd_ioctl,         /* ioctl */
 722         nodev,                  /* devmap */
 723         nodev,                  /* mmap */
 724         nodev,                  /* segmap */
 725         signalfd_poll,          /* poll */
 726         ddi_prop_op,            /* cb_prop_op */
 727         0,                      /* streamtab  */
 728         D_NEW | D_MP            /* Driver compatibility flag */
 729 };
 730 
 731 static struct dev_ops signalfd_ops = {
 732         DEVO_REV,               /* devo_rev */
 733         0,                      /* refcnt */
 734         signalfd_info,          /* get_dev_info */
 735         nulldev,                /* identify */
 736         nulldev,                /* probe */
 737         signalfd_attach,        /* attach */
 738         signalfd_detach,        /* detach */
 739         nodev,                  /* reset */
 740         &signalfd_cb_ops,   /* driver operations */
 741         NULL,                   /* bus operations */
 742         nodev,                  /* dev power */
 743         ddi_quiesce_not_needed, /* quiesce */
 744 };
 745 
 746 static struct modldrv modldrv = {
 747         &mod_driverops,             /* module type (this is a pseudo driver) */
 748         "signalfd support",     /* name of module */
 749         &signalfd_ops,              /* driver ops */
 750 };
 751 
 752 static struct modlinkage modlinkage = {
 753         MODREV_1,
 754         { (void *)&modldrv, NULL }
 755 };
 756 
 757 int
 758 _init(void)
 759 {
 760         return (mod_install(&modlinkage));
 761 }
 762 
 763 int
 764 _info(struct modinfo *modinfop)
 765 {
 766         return (mod_info(&modlinkage, modinfop));
 767 }
 768 
 769 int
 770 _fini(void)
 771 {
 772         return (mod_remove(&modlinkage));
 773 }