1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  14  */
  15 
  16 /*
  17  * Support for the timerfd facility, a Linux-borne facility that allows
  18  * POSIX.1b timers to be created and manipulated via a file descriptor
  19  * interface.
  20  */
  21 
  22 #include <sys/ddi.h>
  23 #include <sys/sunddi.h>
  24 #include <sys/timerfd.h>
  25 #include <sys/conf.h>
  26 #include <sys/vmem.h>
  27 #include <sys/sysmacros.h>
  28 #include <sys/filio.h>
  29 #include <sys/stat.h>
  30 #include <sys/file.h>
  31 #include <sys/timer.h>
  32 
  33 struct timerfd_state;
  34 typedef struct timerfd_state timerfd_state_t;
  35 
  36 struct timerfd_state {
  37         kmutex_t tfd_lock;                      /* lock protecting state */
  38         kcondvar_t tfd_cv;                      /* condvar */
  39         pollhead_t tfd_pollhd;                  /* poll head */
  40         uint64_t tfd_fired;                     /* # of times fired */
  41         itimer_t tfd_itimer;                    /* underlying itimer */
  42         timerfd_state_t *tfd_next;              /* next state on global list */
  43 };
  44 
  45 /*
  46  * Internal global variables.
  47  */
  48 static kmutex_t         timerfd_lock;           /* lock protecting state */
  49 static dev_info_t       *timerfd_devi;          /* device info */
  50 static vmem_t           *timerfd_minor;         /* minor number arena */
  51 static void             *timerfd_softstate;     /* softstate pointer */
  52 static timerfd_state_t  *timerfd_state;         /* global list of state */
  53 
  54 static itimer_t *
  55 timerfd_itimer_lock(timerfd_state_t *state)
  56 {
  57         itimer_t *it = &state->tfd_itimer;
  58 
  59         mutex_enter(&state->tfd_lock);
  60 
  61         while (it->it_lock & ITLK_LOCKED) {
  62                 it->it_blockers++;
  63                 cv_wait(&it->it_cv, &state->tfd_lock);
  64                 it->it_blockers--;
  65         }
  66 
  67         it->it_lock |= ITLK_LOCKED;
  68 
  69         mutex_exit(&state->tfd_lock);
  70 
  71         return (it);
  72 }
  73 
  74 static void
  75 timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it)
  76 {
  77         VERIFY(it == &state->tfd_itimer);
  78         VERIFY(it->it_lock & ITLK_LOCKED);
  79 
  80         mutex_enter(&state->tfd_lock);
  81 
  82         it->it_lock &= ~ITLK_LOCKED;
  83 
  84         if (it->it_blockers)
  85                 cv_signal(&it->it_cv);
  86 
  87         mutex_exit(&state->tfd_lock);
  88 }
  89 
  90 static void
  91 timerfd_fire(itimer_t *it)
  92 {
  93         timerfd_state_t *state = it->it_frontend;
  94         uint64_t oval;
  95 
  96         mutex_enter(&state->tfd_lock);
  97         oval = state->tfd_fired++;
  98         mutex_exit(&state->tfd_lock);
  99 
 100         if (oval == 0) {
 101                 cv_broadcast(&state->tfd_cv);
 102                 pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN);
 103         }
 104 }
 105 
 106 /*ARGSUSED*/
 107 static int
 108 timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 109 {
 110         timerfd_state_t *state;
 111         major_t major = getemajor(*devp);
 112         minor_t minor = getminor(*devp);
 113 
 114         if (minor != TIMERFDMNRN_TIMERFD)
 115                 return (ENXIO);
 116 
 117         mutex_enter(&timerfd_lock);
 118 
 119         minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1,
 120             VM_BESTFIT | VM_SLEEP);
 121 
 122         if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) {
 123                 vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
 124                 mutex_exit(&timerfd_lock);
 125                 return (NULL);
 126         }
 127 
 128         state = ddi_get_soft_state(timerfd_softstate, minor);
 129         *devp = makedevice(major, minor);
 130 
 131         state->tfd_next = timerfd_state;
 132         timerfd_state = state;
 133 
 134         mutex_exit(&timerfd_lock);
 135 
 136         return (0);
 137 }
 138 
 139 /*ARGSUSED*/
 140 static int
 141 timerfd_read(dev_t dev, uio_t *uio, cred_t *cr)
 142 {
 143         timerfd_state_t *state;
 144         minor_t minor = getminor(dev);
 145         uint64_t val;
 146         int err;
 147 
 148         if (uio->uio_resid < sizeof (val))
 149                 return (EINVAL);
 150 
 151         state = ddi_get_soft_state(timerfd_softstate, minor);
 152 
 153         mutex_enter(&state->tfd_lock);
 154 
 155         while (state->tfd_fired == 0) {
 156                 if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
 157                         mutex_exit(&state->tfd_lock);
 158                         return (EAGAIN);
 159                 }
 160 
 161                 if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) {
 162                         mutex_exit(&state->tfd_lock);
 163                         return (EINTR);
 164                 }
 165         }
 166 
 167         /*
 168          * Our tfd_fired is non-zero; slurp its value and then clear it.
 169          */
 170         val = state->tfd_fired;
 171         state->tfd_fired = 0;
 172         mutex_exit(&state->tfd_lock);
 173 
 174         err = uiomove(&val, sizeof (val), UIO_READ, uio);
 175 
 176         return (err);
 177 }
 178 
 179 /*ARGSUSED*/
 180 static int
 181 timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
 182     struct pollhead **phpp)
 183 {
 184         timerfd_state_t *state;
 185         minor_t minor = getminor(dev);
 186         short revents = 0;
 187 
 188         state = ddi_get_soft_state(timerfd_softstate, minor);
 189 
 190         mutex_enter(&state->tfd_lock);
 191 
 192         if (state->tfd_fired > 0)
 193                 revents |= POLLRDNORM | POLLIN;
 194 
 195         if (!(*reventsp = revents & events) && !anyyet)
 196                 *phpp = &state->tfd_pollhd;
 197 
 198         mutex_exit(&state->tfd_lock);
 199 
 200         return (0);
 201 }
 202 
 203 static int
 204 timerfd_copyin(uintptr_t addr, itimerspec_t *dest)
 205 {
 206         if (get_udatamodel() == DATAMODEL_NATIVE) {
 207                 if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0)
 208                         return (EFAULT);
 209         } else {
 210                 itimerspec32_t dest32;
 211 
 212                 if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0)
 213                         return (EFAULT);
 214 
 215                 ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32);
 216         }
 217 
 218         if (itimerspecfix(&dest->it_value) ||
 219             (itimerspecfix(&dest->it_interval) &&
 220             timerspecisset(&dest->it_value))) {
 221                 return (EINVAL);
 222         }
 223 
 224         return (0);
 225 }
 226 
 227 static int
 228 timerfd_copyout(itimerspec_t *src, uintptr_t addr)
 229 {
 230         if (get_udatamodel() == DATAMODEL_NATIVE) {
 231                 if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0)
 232                         return (EFAULT);
 233         } else {
 234                 itimerspec32_t src32;
 235 
 236                 if (ITIMERSPEC_OVERFLOW(src))
 237                         return (EOVERFLOW);
 238 
 239                 ITIMERSPEC_TO_ITIMERSPEC32(&src32, src);
 240 
 241                 if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0)
 242                         return (EFAULT);
 243         }
 244 
 245         return (0);
 246 }
 247 
 248 /*ARGSUSED*/
 249 static int
 250 timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
 251 {
 252         itimerspec_t when, oval;
 253         timerfd_state_t *state;
 254         minor_t minor = getminor(dev);
 255         int err;
 256         itimer_t *it;
 257 
 258         state = ddi_get_soft_state(timerfd_softstate, minor);
 259 
 260         switch (cmd) {
 261         case TIMERFDIOC_CREATE: {
 262                 if (arg == TIMERFD_MONOTONIC)
 263                         arg = CLOCK_MONOTONIC;
 264 
 265                 it = timerfd_itimer_lock(state);
 266 
 267                 if (it->it_backend != NULL) {
 268                         timerfd_itimer_unlock(state, it);
 269                         return (EEXIST);
 270                 }
 271 
 272                 if ((it->it_backend = clock_get_backend(arg)) == NULL) {
 273                         timerfd_itimer_unlock(state, it);
 274                         return (EINVAL);
 275                 }
 276 
 277                 /*
 278                  * We need to provide a proc structure only for purposes
 279                  * of locking CLOCK_REALTIME-based timers -- it is safe to
 280                  * provide p0 here.
 281                  */
 282                 it->it_proc = &p0;
 283 
 284                 err = it->it_backend->clk_timer_create(it, timerfd_fire);
 285 
 286                 if (err != 0) {
 287                         it->it_backend = NULL;
 288                         timerfd_itimer_unlock(state, it);
 289                         return (err);
 290                 }
 291 
 292                 it->it_frontend = state;
 293                 timerfd_itimer_unlock(state, it);
 294 
 295                 return (0);
 296         }
 297 
 298         case TIMERFDIOC_GETTIME: {
 299                 it = timerfd_itimer_lock(state);
 300 
 301                 if (it->it_backend == NULL) {
 302                         timerfd_itimer_unlock(state, it);
 303                         return (ENODEV);
 304                 }
 305 
 306                 err = it->it_backend->clk_timer_gettime(it, &when);
 307                 timerfd_itimer_unlock(state, it);
 308 
 309                 if (err != 0)
 310                         return (err);
 311 
 312                 if ((err = timerfd_copyout(&when, arg)) != 0)
 313                         return (err);
 314 
 315                 return (0);
 316         }
 317 
 318         case TIMERFDIOC_SETTIME: {
 319                 timerfd_settime_t st;
 320 
 321                 if (copyin((void *)arg, &st, sizeof (st)) != 0)
 322                         return (EFAULT);
 323 
 324                 if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0)
 325                         return (err);
 326 
 327                 it = timerfd_itimer_lock(state);
 328 
 329                 if (it->it_backend == NULL) {
 330                         timerfd_itimer_unlock(state, it);
 331                         return (ENODEV);
 332                 }
 333 
 334                 if (st.tfd_settime_ovalue != NULL) {
 335                         err = it->it_backend->clk_timer_gettime(it, &oval);
 336 
 337                         if (err != 0) {
 338                                 timerfd_itimer_unlock(state, it);
 339                                 return (err);
 340                         }
 341                 }
 342 
 343                 /*
 344                  * Before we set the time, we're going to clear tfd_fired.
 345                  * This can potentially race with the (old) timer firing, but
 346                  * the window is deceptively difficult to close:  if we were
 347                  * to simply clear tfd_fired after the call to the backend
 348                  * returned, we would run the risk of plowing a firing of the
 349                  * new timer.  Ultimately, the race can only be resolved by
 350                  * the backend, which would likely need to be extended with a
 351                  * function to call back into when the timer is between states
 352                  * (that is, after the timer can no longer fire with the old
 353                  * timer value, but before it can fire with the new one).
 354                  * This is straightforward enough for backends that set a
 355                  * timer's value by deleting the old one and adding the new
 356                  * one, but for those that modify the timer value in place
 357                  * (e.g., cyclics), the required serialization is necessarily
 358                  * delicate:  the function would have to be callable from
 359                  * arbitrary interrupt context.  While implementing all of
 360                  * this is possible, it does not (for the moment) seem worth
 361                  * it: if the timer is firing at essentially the same moment
 362                  * that it's being reprogrammed, there is a higher-level race
 363                  * with respect to timerfd usage that the progam itself will
 364                  * have to properly resolve -- and it seems reasonable to
 365                  * simply allow the program to resolve it in this case.
 366                  */
 367                 mutex_enter(&state->tfd_lock);
 368                 state->tfd_fired = 0;
 369                 mutex_exit(&state->tfd_lock);
 370 
 371                 err = it->it_backend->clk_timer_settime(it,
 372                     st.tfd_settime_flags & TFD_TIMER_ABSTIME ?
 373                     TIMER_ABSTIME : TIMER_RELTIME, &when);
 374                 timerfd_itimer_unlock(state, it);
 375 
 376                 if (err != 0 || st.tfd_settime_ovalue == NULL)
 377                         return (err);
 378 
 379                 if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0)
 380                         return (err);
 381 
 382                 return (0);
 383         }
 384 
 385         default:
 386                 break;
 387         }
 388 
 389         return (ENOTTY);
 390 }
 391 
 392 /*ARGSUSED*/
 393 static int
 394 timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 395 {
 396         timerfd_state_t *state, **sp;
 397         itimer_t *it;
 398         minor_t minor = getminor(dev);
 399 
 400         state = ddi_get_soft_state(timerfd_softstate, minor);
 401 
 402         if (state->tfd_pollhd.ph_list != NULL) {
 403                 pollwakeup(&state->tfd_pollhd, POLLERR);
 404                 pollhead_clean(&state->tfd_pollhd);
 405         }
 406 
 407         /*
 408          * No one can get to this timer; we don't need to lock it -- we can
 409          * just call on the backend to delete it.
 410          */
 411         it = &state->tfd_itimer;
 412 
 413         if (it->it_backend != NULL)
 414                 it->it_backend->clk_timer_delete(it);
 415 
 416         mutex_enter(&timerfd_lock);
 417 
 418         /*
 419          * Remove our state from our global list.
 420          */
 421         for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next))
 422                 VERIFY(*sp != NULL);
 423 
 424         *sp = (*sp)->tfd_next;
 425 
 426         ddi_soft_state_free(timerfd_softstate, minor);
 427         vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1);
 428 
 429         mutex_exit(&timerfd_lock);
 430 
 431         return (0);
 432 }
 433 
 434 static int
 435 timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 436 {
 437         switch (cmd) {
 438         case DDI_ATTACH:
 439                 break;
 440 
 441         case DDI_RESUME:
 442                 return (DDI_SUCCESS);
 443 
 444         default:
 445                 return (DDI_FAILURE);
 446         }
 447 
 448         mutex_enter(&timerfd_lock);
 449 
 450         if (ddi_soft_state_init(&timerfd_softstate,
 451             sizeof (timerfd_state_t), 0) != 0) {
 452                 cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state");
 453                 mutex_exit(&timerfd_lock);
 454                 return (DDI_FAILURE);
 455         }
 456 
 457         if (ddi_create_minor_node(devi, "timerfd", S_IFCHR,
 458             TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 459                 cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node");
 460                 ddi_soft_state_fini(&timerfd_softstate);
 461                 mutex_exit(&timerfd_lock);
 462                 return (DDI_FAILURE);
 463         }
 464 
 465         ddi_report_dev(devi);
 466         timerfd_devi = devi;
 467 
 468         timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE,
 469             UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0,
 470             VM_SLEEP | VMC_IDENTIFIER);
 471 
 472         mutex_exit(&timerfd_lock);
 473 
 474         return (DDI_SUCCESS);
 475 }
 476 
 477 /*ARGSUSED*/
 478 static int
 479 timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 480 {
 481         switch (cmd) {
 482         case DDI_DETACH:
 483                 break;
 484 
 485         case DDI_SUSPEND:
 486                 return (DDI_SUCCESS);
 487 
 488         default:
 489                 return (DDI_FAILURE);
 490         }
 491 
 492         mutex_enter(&timerfd_lock);
 493         vmem_destroy(timerfd_minor);
 494 
 495         ddi_remove_minor_node(timerfd_devi, NULL);
 496         timerfd_devi = NULL;
 497 
 498         ddi_soft_state_fini(&timerfd_softstate);
 499         mutex_exit(&timerfd_lock);
 500 
 501         return (DDI_SUCCESS);
 502 }
 503 
 504 /*ARGSUSED*/
 505 static int
 506 timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 507 {
 508         int error;
 509 
 510         switch (infocmd) {
 511         case DDI_INFO_DEVT2DEVINFO:
 512                 *result = (void *)timerfd_devi;
 513                 error = DDI_SUCCESS;
 514                 break;
 515         case DDI_INFO_DEVT2INSTANCE:
 516                 *result = (void *)0;
 517                 error = DDI_SUCCESS;
 518                 break;
 519         default:
 520                 error = DDI_FAILURE;
 521         }
 522         return (error);
 523 }
 524 
 525 static struct cb_ops timerfd_cb_ops = {
 526         timerfd_open,           /* open */
 527         timerfd_close,          /* close */
 528         nulldev,                /* strategy */
 529         nulldev,                /* print */
 530         nodev,                  /* dump */
 531         timerfd_read,           /* read */
 532         nodev,                  /* write */
 533         timerfd_ioctl,          /* ioctl */
 534         nodev,                  /* devmap */
 535         nodev,                  /* mmap */
 536         nodev,                  /* segmap */
 537         timerfd_poll,           /* poll */
 538         ddi_prop_op,            /* cb_prop_op */
 539         0,                      /* streamtab  */
 540         D_NEW | D_MP            /* Driver compatibility flag */
 541 };
 542 
 543 static struct dev_ops timerfd_ops = {
 544         DEVO_REV,               /* devo_rev */
 545         0,                      /* refcnt */
 546         timerfd_info,           /* get_dev_info */
 547         nulldev,                /* identify */
 548         nulldev,                /* probe */
 549         timerfd_attach,         /* attach */
 550         timerfd_detach,         /* detach */
 551         nodev,                  /* reset */
 552         &timerfd_cb_ops,    /* driver operations */
 553         NULL,                   /* bus operations */
 554         nodev,                  /* dev power */
 555         ddi_quiesce_not_needed, /* quiesce */
 556 };
 557 
 558 static struct modldrv modldrv = {
 559         &mod_driverops,             /* module type (this is a pseudo driver) */
 560         "timerfd support",      /* name of module */
 561         &timerfd_ops,               /* driver ops */
 562 };
 563 
 564 static struct modlinkage modlinkage = {
 565         MODREV_1,
 566         (void *)&modldrv,
 567         NULL
 568 };
 569 
 570 int
 571 _init(void)
 572 {
 573         return (mod_install(&modlinkage));
 574 }
 575 
 576 int
 577 _info(struct modinfo *modinfop)
 578 {
 579         return (mod_info(&modlinkage, modinfop));
 580 }
 581 
 582 int
 583 _fini(void)
 584 {
 585         return (mod_remove(&modlinkage));
 586 }