1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  14  */
  15 
  16 /*
  17  * Support for the eventfd facility, a Linux-borne facility for user-generated
  18  * file descriptor-based events.
  19  */
  20 
  21 #include <sys/ddi.h>
  22 #include <sys/sunddi.h>
  23 #include <sys/eventfd.h>
  24 #include <sys/conf.h>
  25 #include <sys/vmem.h>
  26 #include <sys/sysmacros.h>
  27 #include <sys/filio.h>
  28 #include <sys/stat.h>
  29 #include <sys/file.h>
  30 
  31 struct eventfd_state;
  32 typedef struct eventfd_state eventfd_state_t;
  33 
  34 struct eventfd_state {
  35         kmutex_t efd_lock;                      /* lock protecting state */
  36         boolean_t efd_semaphore;                /* boolean: sema. semantics */
  37         kcondvar_t efd_cv;                      /* condvar */
  38         pollhead_t efd_pollhd;                  /* poll head */
  39         uint64_t efd_value;                     /* value */
  40         eventfd_state_t *efd_next;              /* next state on global list */
  41 };
  42 
  43 /*
  44  * Internal global variables.
  45  */
  46 static kmutex_t         eventfd_lock;           /* lock protecting state */
  47 static dev_info_t       *eventfd_devi;          /* device info */
  48 static vmem_t           *eventfd_minor;         /* minor number arena */
  49 static void             *eventfd_softstate;     /* softstate pointer */
  50 static eventfd_state_t  *eventfd_state;         /* global list of state */
  51 
  52 /*ARGSUSED*/
  53 static int
  54 eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
  55 {
  56         eventfd_state_t *state;
  57         major_t major = getemajor(*devp);
  58         minor_t minor = getminor(*devp);
  59 
  60         if (minor != EVENTFDMNRN_EVENTFD)
  61                 return (ENXIO);
  62 
  63         mutex_enter(&eventfd_lock);
  64 
  65         minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1,
  66             VM_BESTFIT | VM_SLEEP);
  67 
  68         if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) {
  69                 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
  70                 mutex_exit(&eventfd_lock);
  71                 return (NULL);
  72         }
  73 
  74         state = ddi_get_soft_state(eventfd_softstate, minor);
  75         *devp = makedevice(major, minor);
  76 
  77         state->efd_next = eventfd_state;
  78         eventfd_state = state;
  79 
  80         mutex_exit(&eventfd_lock);
  81 
  82         return (0);
  83 }
  84 
  85 /*ARGSUSED*/
  86 static int
  87 eventfd_read(dev_t dev, uio_t *uio, cred_t *cr)
  88 {
  89         eventfd_state_t *state;
  90         minor_t minor = getminor(dev);
  91         uint64_t val, oval;
  92         int err;
  93 
  94         if (uio->uio_resid < sizeof (val))
  95                 return (EINVAL);
  96 
  97         state = ddi_get_soft_state(eventfd_softstate, minor);
  98 
  99         mutex_enter(&state->efd_lock);
 100 
 101         while (state->efd_value == 0) {
 102                 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
 103                         mutex_exit(&state->efd_lock);
 104                         return (EAGAIN);
 105                 }
 106 
 107                 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
 108                         mutex_exit(&state->efd_lock);
 109                         return (EINTR);
 110                 }
 111         }
 112 
 113         /*
 114          * We have a non-zero value and we own the lock; our behavior now
 115          * depends on whether or not EFD_SEMAPHORE was set when the eventfd
 116          * was created.
 117          */
 118         val = oval = state->efd_value;
 119 
 120         if (state->efd_semaphore) {
 121                 state->efd_value--;
 122                 val = 1;
 123         } else {
 124                 state->efd_value = 0;
 125         }
 126 
 127         err = uiomove(&val, sizeof (val), UIO_READ, uio);
 128 
 129         mutex_exit(&state->efd_lock);
 130 
 131         if (oval == EVENTFD_VALMAX) {
 132                 cv_broadcast(&state->efd_cv);
 133                 pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT);
 134         }
 135 
 136         return (err);
 137 }
 138 
 139 /*ARGSUSED*/
 140 static int
 141 eventfd_write(dev_t dev, struct uio *uio, cred_t *credp)
 142 {
 143         eventfd_state_t *state;
 144         minor_t minor = getminor(dev);
 145         uint64_t val, oval;
 146         int err;
 147 
 148         if (uio->uio_resid < sizeof (val))
 149                 return (EINVAL);
 150 
 151         if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0)
 152                 return (err);
 153 
 154         if (val > EVENTFD_VALMAX)
 155                 return (EINVAL);
 156 
 157         state = ddi_get_soft_state(eventfd_softstate, minor);
 158 
 159         mutex_enter(&state->efd_lock);
 160 
 161         while (val > EVENTFD_VALMAX - state->efd_value) {
 162                 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
 163                         mutex_exit(&state->efd_lock);
 164                         return (EAGAIN);
 165                 }
 166 
 167                 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) {
 168                         mutex_exit(&state->efd_lock);
 169                         return (EINTR);
 170                 }
 171         }
 172 
 173         /*
 174          * We now know that we can add the value without overflowing.
 175          */
 176         state->efd_value = (oval = state->efd_value) + val;
 177 
 178         mutex_exit(&state->efd_lock);
 179 
 180         if (oval == 0) {
 181                 cv_broadcast(&state->efd_cv);
 182                 pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN);
 183         }
 184 
 185         return (0);
 186 }
 187 
 188 /*ARGSUSED*/
 189 static int
 190 eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
 191     struct pollhead **phpp)
 192 {
 193         eventfd_state_t *state;
 194         minor_t minor = getminor(dev);
 195         short revents = 0;
 196 
 197         state = ddi_get_soft_state(eventfd_softstate, minor);
 198 
 199         mutex_enter(&state->efd_lock);
 200 
 201         if (state->efd_value > 0)
 202                 revents |= POLLRDNORM | POLLIN;
 203 
 204         if (state->efd_value < EVENTFD_VALMAX)
 205                 revents |= POLLWRNORM | POLLOUT;
 206 
 207         if (!(*reventsp = revents & events) && !anyyet)
 208                 *phpp = &state->efd_pollhd;
 209 
 210         mutex_exit(&state->efd_lock);
 211 
 212         return (0);
 213 }
 214 
 215 /*ARGSUSED*/
 216 static int
 217 eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
 218 {
 219         eventfd_state_t *state;
 220         minor_t minor = getminor(dev);
 221 
 222         state = ddi_get_soft_state(eventfd_softstate, minor);
 223 
 224         switch (cmd) {
 225         case EVENTFDIOC_SEMAPHORE: {
 226                 mutex_enter(&state->efd_lock);
 227                 state->efd_semaphore ^= 1;
 228                 mutex_exit(&state->efd_lock);
 229 
 230                 return (0);
 231         }
 232 
 233         default:
 234                 break;
 235         }
 236 
 237         return (ENOTTY);
 238 }
 239 
 240 /*ARGSUSED*/
 241 static int
 242 eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 243 {
 244         eventfd_state_t *state, **sp;
 245         minor_t minor = getminor(dev);
 246 
 247         state = ddi_get_soft_state(eventfd_softstate, minor);
 248 
 249         if (state->efd_pollhd.ph_list != NULL) {
 250                 pollwakeup(&state->efd_pollhd, POLLERR);
 251                 pollhead_clean(&state->efd_pollhd);
 252         }
 253 
 254         mutex_enter(&eventfd_lock);
 255 
 256         /*
 257          * Remove our state from our global list.
 258          */
 259         for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next))
 260                 VERIFY(*sp != NULL);
 261 
 262         *sp = (*sp)->efd_next;
 263 
 264         ddi_soft_state_free(eventfd_softstate, minor);
 265         vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1);
 266 
 267         mutex_exit(&eventfd_lock);
 268 
 269         return (0);
 270 }
 271 
 272 static int
 273 eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 274 {
 275         switch (cmd) {
 276         case DDI_ATTACH:
 277                 break;
 278 
 279         case DDI_RESUME:
 280                 return (DDI_SUCCESS);
 281 
 282         default:
 283                 return (DDI_FAILURE);
 284         }
 285 
 286         mutex_enter(&eventfd_lock);
 287 
 288         if (ddi_soft_state_init(&eventfd_softstate,
 289             sizeof (eventfd_state_t), 0) != 0) {
 290                 cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state");
 291                 mutex_exit(&eventfd_lock);
 292                 return (DDI_FAILURE);
 293         }
 294 
 295         if (ddi_create_minor_node(devi, "eventfd", S_IFCHR,
 296             EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 297                 cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node");
 298                 ddi_soft_state_fini(&eventfd_softstate);
 299                 mutex_exit(&eventfd_lock);
 300                 return (DDI_FAILURE);
 301         }
 302 
 303         ddi_report_dev(devi);
 304         eventfd_devi = devi;
 305 
 306         eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE,
 307             UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
 308             VM_SLEEP | VMC_IDENTIFIER);
 309 
 310         mutex_exit(&eventfd_lock);
 311 
 312         return (DDI_SUCCESS);
 313 }
 314 
 315 /*ARGSUSED*/
 316 static int
 317 eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 318 {
 319         switch (cmd) {
 320         case DDI_DETACH:
 321                 break;
 322 
 323         case DDI_SUSPEND:
 324                 return (DDI_SUCCESS);
 325 
 326         default:
 327                 return (DDI_FAILURE);
 328         }
 329 
 330         mutex_enter(&eventfd_lock);
 331         vmem_destroy(eventfd_minor);
 332 
 333         ddi_remove_minor_node(eventfd_devi, NULL);
 334         eventfd_devi = NULL;
 335 
 336         ddi_soft_state_fini(&eventfd_softstate);
 337         mutex_exit(&eventfd_lock);
 338 
 339         return (DDI_SUCCESS);
 340 }
 341 
 342 /*ARGSUSED*/
 343 static int
 344 eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 345 {
 346         int error;
 347 
 348         switch (infocmd) {
 349         case DDI_INFO_DEVT2DEVINFO:
 350                 *result = (void *)eventfd_devi;
 351                 error = DDI_SUCCESS;
 352                 break;
 353         case DDI_INFO_DEVT2INSTANCE:
 354                 *result = (void *)0;
 355                 error = DDI_SUCCESS;
 356                 break;
 357         default:
 358                 error = DDI_FAILURE;
 359         }
 360         return (error);
 361 }
 362 
 363 static struct cb_ops eventfd_cb_ops = {
 364         eventfd_open,           /* open */
 365         eventfd_close,          /* close */
 366         nulldev,                /* strategy */
 367         nulldev,                /* print */
 368         nodev,                  /* dump */
 369         eventfd_read,           /* read */
 370         eventfd_write,          /* write */
 371         eventfd_ioctl,          /* ioctl */
 372         nodev,                  /* devmap */
 373         nodev,                  /* mmap */
 374         nodev,                  /* segmap */
 375         eventfd_poll,           /* poll */
 376         ddi_prop_op,            /* cb_prop_op */
 377         0,                      /* streamtab  */
 378         D_NEW | D_MP            /* Driver compatibility flag */
 379 };
 380 
 381 static struct dev_ops eventfd_ops = {
 382         DEVO_REV,               /* devo_rev */
 383         0,                      /* refcnt */
 384         eventfd_info,           /* get_dev_info */
 385         nulldev,                /* identify */
 386         nulldev,                /* probe */
 387         eventfd_attach,         /* attach */
 388         eventfd_detach,         /* detach */
 389         nodev,                  /* reset */
 390         &eventfd_cb_ops,    /* driver operations */
 391         NULL,                   /* bus operations */
 392         nodev,                  /* dev power */
 393         ddi_quiesce_not_needed, /* quiesce */
 394 };
 395 
 396 static struct modldrv modldrv = {
 397         &mod_driverops,             /* module type (this is a pseudo driver) */
 398         "eventfd support",      /* name of module */
 399         &eventfd_ops,               /* driver ops */
 400 };
 401 
 402 static struct modlinkage modlinkage = {
 403         MODREV_1,
 404         { (void *)&modldrv, NULL }
 405 };
 406 
 407 int
 408 _init(void)
 409 {
 410         return (mod_install(&modlinkage));
 411 }
 412 
 413 int
 414 _info(struct modinfo *modinfop)
 415 {
 416         return (mod_info(&modlinkage, modinfop));
 417 }
 418 
 419 int
 420 _fini(void)
 421 {
 422         return (mod_remove(&modlinkage));
 423 }