1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2015 Joyent, Inc. All rights reserved. 14 */ 15 16 /* 17 * Support for the eventfd facility, a Linux-borne facility for user-generated 18 * file descriptor-based events. 19 */ 20 21 #include <sys/ddi.h> 22 #include <sys/sunddi.h> 23 #include <sys/eventfd.h> 24 #include <sys/conf.h> 25 #include <sys/vmem.h> 26 #include <sys/sysmacros.h> 27 #include <sys/filio.h> 28 #include <sys/stat.h> 29 #include <sys/file.h> 30 31 struct eventfd_state; 32 typedef struct eventfd_state eventfd_state_t; 33 34 struct eventfd_state { 35 kmutex_t efd_lock; /* lock protecting state */ 36 boolean_t efd_semaphore; /* boolean: sema. semantics */ 37 kcondvar_t efd_cv; /* condvar */ 38 pollhead_t efd_pollhd; /* poll head */ 39 uint64_t efd_value; /* value */ 40 eventfd_state_t *efd_next; /* next state on global list */ 41 }; 42 43 /* 44 * Internal global variables. 45 */ 46 static kmutex_t eventfd_lock; /* lock protecting state */ 47 static dev_info_t *eventfd_devi; /* device info */ 48 static vmem_t *eventfd_minor; /* minor number arena */ 49 static void *eventfd_softstate; /* softstate pointer */ 50 static eventfd_state_t *eventfd_state; /* global list of state */ 51 52 /*ARGSUSED*/ 53 static int 54 eventfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 55 { 56 eventfd_state_t *state; 57 major_t major = getemajor(*devp); 58 minor_t minor = getminor(*devp); 59 60 if (minor != EVENTFDMNRN_EVENTFD) 61 return (ENXIO); 62 63 mutex_enter(&eventfd_lock); 64 65 minor = (minor_t)(uintptr_t)vmem_alloc(eventfd_minor, 1, 66 VM_BESTFIT | VM_SLEEP); 67 68 if (ddi_soft_state_zalloc(eventfd_softstate, minor) != DDI_SUCCESS) { 69 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1); 70 mutex_exit(&eventfd_lock); 71 return (NULL); 72 } 73 74 state = ddi_get_soft_state(eventfd_softstate, minor); 75 *devp = makedevice(major, minor); 76 77 state->efd_next = eventfd_state; 78 eventfd_state = state; 79 80 mutex_exit(&eventfd_lock); 81 82 return (0); 83 } 84 85 /*ARGSUSED*/ 86 static int 87 eventfd_read(dev_t dev, uio_t *uio, cred_t *cr) 88 { 89 eventfd_state_t *state; 90 minor_t minor = getminor(dev); 91 uint64_t val, oval; 92 int err; 93 94 if (uio->uio_resid < sizeof (val)) 95 return (EINVAL); 96 97 state = ddi_get_soft_state(eventfd_softstate, minor); 98 99 mutex_enter(&state->efd_lock); 100 101 while (state->efd_value == 0) { 102 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { 103 mutex_exit(&state->efd_lock); 104 return (EAGAIN); 105 } 106 107 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) { 108 mutex_exit(&state->efd_lock); 109 return (EINTR); 110 } 111 } 112 113 /* 114 * We have a non-zero value and we own the lock; our behavior now 115 * depends on whether or not EFD_SEMAPHORE was set when the eventfd 116 * was created. 117 */ 118 val = oval = state->efd_value; 119 120 if (state->efd_semaphore) { 121 state->efd_value--; 122 val = 1; 123 } else { 124 state->efd_value = 0; 125 } 126 127 err = uiomove(&val, sizeof (val), UIO_READ, uio); 128 129 mutex_exit(&state->efd_lock); 130 131 if (oval == EVENTFD_VALMAX) { 132 cv_broadcast(&state->efd_cv); 133 pollwakeup(&state->efd_pollhd, POLLWRNORM | POLLOUT); 134 } 135 136 return (err); 137 } 138 139 /*ARGSUSED*/ 140 static int 141 eventfd_write(dev_t dev, struct uio *uio, cred_t *credp) 142 { 143 eventfd_state_t *state; 144 minor_t minor = getminor(dev); 145 uint64_t val, oval; 146 int err; 147 148 if (uio->uio_resid < sizeof (val)) 149 return (EINVAL); 150 151 if ((err = uiomove(&val, sizeof (val), UIO_WRITE, uio)) != 0) 152 return (err); 153 154 if (val > EVENTFD_VALMAX) 155 return (EINVAL); 156 157 state = ddi_get_soft_state(eventfd_softstate, minor); 158 159 mutex_enter(&state->efd_lock); 160 161 while (val > EVENTFD_VALMAX - state->efd_value) { 162 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { 163 mutex_exit(&state->efd_lock); 164 return (EAGAIN); 165 } 166 167 if (!cv_wait_sig_swap(&state->efd_cv, &state->efd_lock)) { 168 mutex_exit(&state->efd_lock); 169 return (EINTR); 170 } 171 } 172 173 /* 174 * We now know that we can add the value without overflowing. 175 */ 176 state->efd_value = (oval = state->efd_value) + val; 177 178 mutex_exit(&state->efd_lock); 179 180 if (oval == 0) { 181 cv_broadcast(&state->efd_cv); 182 pollwakeup(&state->efd_pollhd, POLLRDNORM | POLLIN); 183 } 184 185 return (0); 186 } 187 188 /*ARGSUSED*/ 189 static int 190 eventfd_poll(dev_t dev, short events, int anyyet, short *reventsp, 191 struct pollhead **phpp) 192 { 193 eventfd_state_t *state; 194 minor_t minor = getminor(dev); 195 short revents = 0; 196 197 state = ddi_get_soft_state(eventfd_softstate, minor); 198 199 mutex_enter(&state->efd_lock); 200 201 if (state->efd_value > 0) 202 revents |= POLLRDNORM | POLLIN; 203 204 if (state->efd_value < EVENTFD_VALMAX) 205 revents |= POLLWRNORM | POLLOUT; 206 207 if (!(*reventsp = revents & events) && !anyyet) 208 *phpp = &state->efd_pollhd; 209 210 mutex_exit(&state->efd_lock); 211 212 return (0); 213 } 214 215 /*ARGSUSED*/ 216 static int 217 eventfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 218 { 219 eventfd_state_t *state; 220 minor_t minor = getminor(dev); 221 222 state = ddi_get_soft_state(eventfd_softstate, minor); 223 224 switch (cmd) { 225 case EVENTFDIOC_SEMAPHORE: { 226 mutex_enter(&state->efd_lock); 227 state->efd_semaphore ^= 1; 228 mutex_exit(&state->efd_lock); 229 230 return (0); 231 } 232 233 default: 234 break; 235 } 236 237 return (ENOTTY); 238 } 239 240 /*ARGSUSED*/ 241 static int 242 eventfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 243 { 244 eventfd_state_t *state, **sp; 245 minor_t minor = getminor(dev); 246 247 state = ddi_get_soft_state(eventfd_softstate, minor); 248 249 if (state->efd_pollhd.ph_list != NULL) { 250 pollwakeup(&state->efd_pollhd, POLLERR); 251 pollhead_clean(&state->efd_pollhd); 252 } 253 254 mutex_enter(&eventfd_lock); 255 256 /* 257 * Remove our state from our global list. 258 */ 259 for (sp = &eventfd_state; *sp != state; sp = &((*sp)->efd_next)) 260 VERIFY(*sp != NULL); 261 262 *sp = (*sp)->efd_next; 263 264 ddi_soft_state_free(eventfd_softstate, minor); 265 vmem_free(eventfd_minor, (void *)(uintptr_t)minor, 1); 266 267 mutex_exit(&eventfd_lock); 268 269 return (0); 270 } 271 272 static int 273 eventfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 274 { 275 switch (cmd) { 276 case DDI_ATTACH: 277 break; 278 279 case DDI_RESUME: 280 return (DDI_SUCCESS); 281 282 default: 283 return (DDI_FAILURE); 284 } 285 286 mutex_enter(&eventfd_lock); 287 288 if (ddi_soft_state_init(&eventfd_softstate, 289 sizeof (eventfd_state_t), 0) != 0) { 290 cmn_err(CE_NOTE, "/dev/eventfd failed to create soft state"); 291 mutex_exit(&eventfd_lock); 292 return (DDI_FAILURE); 293 } 294 295 if (ddi_create_minor_node(devi, "eventfd", S_IFCHR, 296 EVENTFDMNRN_EVENTFD, DDI_PSEUDO, NULL) == DDI_FAILURE) { 297 cmn_err(CE_NOTE, "/dev/eventfd couldn't create minor node"); 298 ddi_soft_state_fini(&eventfd_softstate); 299 mutex_exit(&eventfd_lock); 300 return (DDI_FAILURE); 301 } 302 303 ddi_report_dev(devi); 304 eventfd_devi = devi; 305 306 eventfd_minor = vmem_create("eventfd_minor", (void *)EVENTFDMNRN_CLONE, 307 UINT32_MAX - EVENTFDMNRN_CLONE, 1, NULL, NULL, NULL, 0, 308 VM_SLEEP | VMC_IDENTIFIER); 309 310 mutex_exit(&eventfd_lock); 311 312 return (DDI_SUCCESS); 313 } 314 315 /*ARGSUSED*/ 316 static int 317 eventfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 318 { 319 switch (cmd) { 320 case DDI_DETACH: 321 break; 322 323 case DDI_SUSPEND: 324 return (DDI_SUCCESS); 325 326 default: 327 return (DDI_FAILURE); 328 } 329 330 mutex_enter(&eventfd_lock); 331 vmem_destroy(eventfd_minor); 332 333 ddi_remove_minor_node(eventfd_devi, NULL); 334 eventfd_devi = NULL; 335 336 ddi_soft_state_fini(&eventfd_softstate); 337 mutex_exit(&eventfd_lock); 338 339 return (DDI_SUCCESS); 340 } 341 342 /*ARGSUSED*/ 343 static int 344 eventfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 345 { 346 int error; 347 348 switch (infocmd) { 349 case DDI_INFO_DEVT2DEVINFO: 350 *result = (void *)eventfd_devi; 351 error = DDI_SUCCESS; 352 break; 353 case DDI_INFO_DEVT2INSTANCE: 354 *result = (void *)0; 355 error = DDI_SUCCESS; 356 break; 357 default: 358 error = DDI_FAILURE; 359 } 360 return (error); 361 } 362 363 static struct cb_ops eventfd_cb_ops = { 364 eventfd_open, /* open */ 365 eventfd_close, /* close */ 366 nulldev, /* strategy */ 367 nulldev, /* print */ 368 nodev, /* dump */ 369 eventfd_read, /* read */ 370 eventfd_write, /* write */ 371 eventfd_ioctl, /* ioctl */ 372 nodev, /* devmap */ 373 nodev, /* mmap */ 374 nodev, /* segmap */ 375 eventfd_poll, /* poll */ 376 ddi_prop_op, /* cb_prop_op */ 377 0, /* streamtab */ 378 D_NEW | D_MP /* Driver compatibility flag */ 379 }; 380 381 static struct dev_ops eventfd_ops = { 382 DEVO_REV, /* devo_rev */ 383 0, /* refcnt */ 384 eventfd_info, /* get_dev_info */ 385 nulldev, /* identify */ 386 nulldev, /* probe */ 387 eventfd_attach, /* attach */ 388 eventfd_detach, /* detach */ 389 nodev, /* reset */ 390 &eventfd_cb_ops, /* driver operations */ 391 NULL, /* bus operations */ 392 nodev, /* dev power */ 393 ddi_quiesce_not_needed, /* quiesce */ 394 }; 395 396 static struct modldrv modldrv = { 397 &mod_driverops, /* module type (this is a pseudo driver) */ 398 "eventfd support", /* name of module */ 399 &eventfd_ops, /* driver ops */ 400 }; 401 402 static struct modlinkage modlinkage = { 403 MODREV_1, 404 { (void *)&modldrv, NULL } 405 }; 406 407 int 408 _init(void) 409 { 410 return (mod_install(&modlinkage)); 411 } 412 413 int 414 _info(struct modinfo *modinfop) 415 { 416 return (mod_info(&modlinkage, modinfop)); 417 } 418 419 int 420 _fini(void) 421 { 422 return (mod_remove(&modlinkage)); 423 }