1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/atomic.h>
  28 #include <sys/callb.h>
  29 #include <sys/conf.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/taskq.h>
  32 #include <sys/dditypes.h>
  33 #include <sys/ddi_timer.h>
  34 #include <sys/disp.h>
  35 #include <sys/kobj.h>
  36 #include <sys/note.h>
  37 #include <sys/param.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/systm.h>
  40 #include <sys/time.h>
  41 #include <sys/types.h>
  42 
  43 /*
  44  * global variables for timeout request
  45  */
  46 static kmem_cache_t *req_cache;         /* kmem cache for timeout request */
  47 
  48 /*
  49  * taskq parameters for cyclic_timer
  50  *
  51  * timer_taskq_num:
  52  * timer_taskq_num represents the number of taskq threads.
  53  * Currently 4 threads are pooled to handle periodic timeout requests.
  54  * This number is chosen based on the fact that the callout (one-time
  55  * timeout framework) uses 8 threads with TQ_NOSLEEP; the periodic timeout
  56  * calls taskq_dispatch() with TQ_SLEEP instead, and in this case, 4 threads
  57  * should be sufficient to handle periodic timeout requests. (see also
  58  * timer_taskq_max_num below)
  59  *
  60  * timer_taskq_min_num:
  61  * timer_taskq_min_num represents the number of pre-populated taskq_ent
  62  * structures, and this variable holds the same value as timer_taskq_num does.
  63  *
  64  * timer_taskq_max_num:
  65  * Since TQ_SLEEP is set when taskq_dispatch() is called, the framework waits
  66  * for one second if more taskq_ent structures than timer_taskq_max_num are
  67  * required. However, from the timeout point of view, one second is much longer
  68  * than expected, and to prevent this occurrence, timer_taskq_max_num should
  69  * hold a sufficiently-large value, which is 128 here. Note that since the size
  70  * of taskq_ent_t is relatively small, this doesn't use up the resource so much.
  71  * (Currently the size is less than 8k at most)
  72  *
  73  * About the detailed explanation of the taskq function arguments, please see
  74  * usr/src/uts/common/os/taskq.c.
  75  */
  76 int timer_taskq_num = 4;                /* taskq thread number */
  77 int timer_taskq_min_num = 4;            /* min. number of taskq_ent structs */
  78 int timer_taskq_max_num = 128;          /* max. number of taskq_ent structs */
  79 static taskq_t *tm_taskq;               /* taskq thread pool */
  80 static kthread_t *tm_work_thread;       /* work thread invoking taskq */
  81 
  82 /*
  83  * timer variables
  84  */
  85 static cyc_timer_t *ddi_timer;          /* ddi timer based on the cyclic */
  86 static volatile hrtime_t timer_hrtime;  /* current tick time on the timer */
  87 
  88 /*
  89  * Variable used for the suspend/resume.
  90  */
  91 static volatile boolean_t timer_suspended;
  92 
  93 /*
  94  * Kernel taskq queue to ddi timer
  95  */
  96 static list_t kern_queue;       /* kernel thread request queue */
  97 static kcondvar_t kern_cv;      /* condition variable for taskq queue */
  98 
  99 /*
 100  * Software interrupt queue dedicated to ddi timer
 101  */
 102 static list_t intr_queue;       /* software interrupt request queue */
 103 static uint_t intr_state;       /* software interrupt state */
 104 
 105 /*
 106  * This lock is used to protect the intr_queue and kern_queue.
 107  * It's also used to protect the intr_state which represents the software
 108  * interrupt state for the timer.
 109  */
 110 static kmutex_t disp_req_lock;
 111 
 112 /*
 113  * the periodic timer interrupt priority level
 114  */
 115 enum {
 116         TM_IPL_0 = 0,                   /* kernel context */
 117         TM_IPL_1, TM_IPL_2, TM_IPL_3,   /* level 1-3 */
 118         TM_IPL_4, TM_IPL_5, TM_IPL_6,   /* level 4-6 */
 119         TM_IPL_7, TM_IPL_8, TM_IPL_9,   /* level 7-9 */
 120         TM_IPL_10                       /* level 10 */
 121 };
 122 
 123 /*
 124  * A callback handler used by CPR to stop and resume callouts.
 125  * Since the taskq uses TASKQ_CPR_SAFE, the function just set the boolean
 126  * flag to timer_suspended here.
 127  */
 128 /*ARGSUSED*/
 129 static boolean_t
 130 timer_cpr_callb(void *arg, int code)
 131 {
 132         timer_suspended = (code == CB_CODE_CPR_CHKPT);
 133         return (B_TRUE);
 134 }
 135 
 136 /*
 137  * Return a proposed timeout request id. add_req() determines whether
 138  * or not the proposed one is used. If it's not suitable, add_req()
 139  * recalls get_req_cnt(). To reduce the lock contention between the
 140  * timer and i_untimeout(), the atomic instruction should be used here.
 141  */
 142 static timeout_t
 143 get_req_cnt(void)
 144 {
 145         static volatile ulong_t timeout_cnt = 0;
 146         return ((timeout_t)atomic_inc_ulong_nv(&timeout_cnt));
 147 }
 148 
 149 /*
 150  * Get the system resolution.
 151  * Note. currently there is a restriction about the system resolution, and
 152  * the 10ms tick (the default clock resolution) is only supported now.
 153  */
 154 static hrtime_t
 155 i_get_res(void)
 156 {
 157         return ((hrtime_t)10000000); /* 10ms tick only */
 158 }
 159 
 160 /*
 161  * Return the value for the cog of the timing wheel.
 162  * TICK_FACTOR is used to gain a finer cog on the clock resolution.
 163  */
 164 static hrtime_t
 165 tw_tick(hrtime_t time)
 166 {
 167         return ((time << TICK_FACTOR) / ddi_timer->res);
 168 }
 169 
 170 /*
 171  * Calculate the expiration time for the timeout request.
 172  */
 173 static hrtime_t
 174 expire_tick(tm_req_t *req)
 175 {
 176         return (tw_tick(req->exp_time));
 177 }
 178 
 179 /*
 180  * Register a timeout request to the timer. This function is used
 181  * in i_timeout().
 182  */
 183 static timeout_t
 184 add_req(tm_req_t *req)
 185 {
 186         timer_tw_t *tid, *tw;
 187         tm_req_t *next;
 188         timeout_t id;
 189 
 190 retry:
 191         /*
 192          * Retrieve a timeout request id. Since i_timeout() needs to return
 193          * a non-zero value, re-try if the zero is gotten.
 194          */
 195         if ((id = get_req_cnt()) == 0)
 196                 id = get_req_cnt();
 197 
 198         /*
 199          * Check if the id is not used yet. Since the framework now deals
 200          * with the periodic timeout requests, we cannot assume the id
 201          * allocated (long) before doesn't exist any more when it will
 202          * be re-assigned again (especially on 32bit) but need to handle
 203          * this case to solve the conflicts. If it's used already, retry
 204          * another.
 205          */
 206         tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
 207         mutex_enter(&tid->lock);
 208         for (next = list_head(&tid->req); next != NULL;
 209             next = list_next(&tid->req, next)) {
 210                 if (next->id == id) {
 211                         mutex_exit(&tid->lock);
 212                         goto retry;
 213                 }
 214         }
 215         /* Nobody uses this id yet */
 216         req->id = id;
 217 
 218         /*
 219          * Register this request to the timer.
 220          * The list operation must be list_insert_head().
 221          * Other operations can degrade performance.
 222          */
 223         list_insert_head(&tid->req, req);
 224         mutex_exit(&tid->lock);
 225 
 226         tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
 227         mutex_enter(&tw->lock);
 228         /*
 229          * Other operations than list_insert_head() can
 230          * degrade performance here.
 231          */
 232         list_insert_head(&tw->req, req);
 233         mutex_exit(&tw->lock);
 234 
 235         return (id);
 236 }
 237 
 238 /*
 239  * Periodic timeout requests cannot be removed until they are canceled
 240  * explicitly. Until then, they need to be re-registerd after they are
 241  * fired. transfer_req() re-registers the requests for the next fires.
 242  * Note. transfer_req() sends the cv_signal to timeout_execute(), which
 243  * runs in interrupt context. Make sure this function will not be blocked,
 244  * otherwise the deadlock situation can occur.
 245  */
 246 static void
 247 transfer_req(tm_req_t *req, timer_tw_t *tw)
 248 {
 249         timer_tw_t *new_tw;
 250         hrtime_t curr_time;
 251         ASSERT(tw && MUTEX_HELD(&tw->lock));
 252 
 253         /* Calculate the next expiration time by interval */
 254         req->exp_time += req->interval;
 255         curr_time = gethrtime();
 256 
 257         /*
 258          * If a long time (more than 1 clock resolution) has already
 259          * passed for some reason (e.g. debugger or high interrupt),
 260          * round up the next expiration to the appropriate one
 261          * since this request is periodic and never catches with it.
 262          */
 263         if (curr_time - req->exp_time >= ddi_timer->res) {
 264                 req->exp_time = roundup(curr_time + req->interval,
 265                     ddi_timer->res);
 266         }
 267 
 268         /*
 269          * Re-register this request.
 270          * Note. since it is guaranteed that the timer is invoked on only
 271          * one CPU at any time (by the cyclic subsystem), a deadlock
 272          * cannot occur regardless of the lock order here.
 273          */
 274         new_tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
 275 
 276         /*
 277          * If it's on the timer cog already, there is nothing
 278          * to do. Just return.
 279          */
 280         if (new_tw == tw)
 281                 return;
 282 
 283         /* Remove this request from the timer */
 284         list_remove(&tw->req, req);
 285 
 286         /* Re-register this request to the timer */
 287         mutex_enter(&new_tw->lock);
 288 
 289         /*
 290          * Other operations than list_insert_head() can
 291          * degrade performance here.
 292          */
 293         list_insert_head(&new_tw->req, req);
 294         mutex_exit(&new_tw->lock);
 295 
 296         /*
 297          * Set the TM_TRANSFER flag and notify the request is transfered
 298          * completely. This prevents a race in the case that this request
 299          * is serviced on another CPU already.
 300          */
 301         mutex_enter(&req->lock);
 302         req->flags |= TM_TRANSFER;
 303         cv_signal(&req->cv);
 304         mutex_exit(&req->lock);
 305 }
 306 
 307 /*
 308  * Execute timeout requests.
 309  * Note. since timeout_execute() can run in interrupt context and block
 310  * on condition variables, there are restrictions on the timer code that
 311  * signals these condition variables (see i_untimeout(), transfer_req(),
 312  * and condvar(9F)). Functions that signal these cvs must ensure that
 313  * they will not be blocked (for memory allocations or any other reason)
 314  * since condition variables don't support priority inheritance.
 315  */
 316 static void
 317 timeout_execute(void *arg)
 318 {
 319         tm_req_t *req = (tm_req_t *)arg;
 320         ASSERT(req->flags & TM_INVOKING && !(req->flags & TM_EXECUTING));
 321 
 322         for (;;) {
 323                 /*
 324                  * Check if this request is canceled. If it's canceled, do not
 325                  * execute this request.
 326                  */
 327                 mutex_enter(&req->lock);
 328                 if (!(req->flags & TM_CANCEL)) {
 329                         /*
 330                          * Set the current thread to prevent a dead lock
 331                          * situation in case that this timeout request is
 332                          * canceled in the handler being invoked now.
 333                          * (this doesn't violate the spec) Set TM_EXECUTING
 334                          * to show this handler is invoked soon.
 335                          */
 336                         req->h_thread = curthread;
 337                         req->flags |= TM_EXECUTING;
 338                         mutex_exit(&req->lock);
 339 
 340                         /* The handler is invoked without holding any locks */
 341                         (*req->handler)(req->arg);
 342 
 343                         mutex_enter(&req->lock);
 344                 }
 345 
 346                 /*
 347                  * Check if this request is canceled or not. If not, prepare
 348                  * for the next fire.
 349                  */
 350                 if (req->flags & TM_CANCEL) {
 351                         timer_tw_t *tw;
 352                         /*
 353                          * Wait until the timer finishes all things for
 354                          * this request.
 355                          */
 356                         while (!(req->flags & TM_TRANSFER))
 357                                 cv_wait(&req->cv, &req->lock);
 358                         mutex_exit(&req->lock);
 359                         ASSERT(req->flags & TM_TRANSFER);
 360 
 361                         /* Remove this request from the timer */
 362                         tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
 363                         mutex_enter(&tw->lock);
 364                         list_remove(&tw->req, req);
 365                         mutex_exit(&tw->lock);
 366 
 367                         /* Free this request */
 368                         kmem_cache_free(req_cache, req);
 369                         return;
 370                 }
 371                 ASSERT(req->flags & TM_EXECUTING);
 372 
 373                 /*
 374                  * TM_EXECUTING must be set at this point.
 375                  * Unset the flag.
 376                  */
 377                 req->flags &= ~(TM_EXECUTING | TM_TRANSFER);
 378 
 379                 /*
 380                  * Decrease the request cnt. The reqest cnt shows
 381                  * how many times this request is executed now.
 382                  * If this counter becomes the zero, drop TM_INVOKING
 383                  * to show there is no requests to do now.
 384                  */
 385                 req->cnt--;
 386                 if (req->cnt == 0) {
 387                         req->flags &= ~TM_INVOKING;
 388                         mutex_exit(&req->lock);
 389                         return;
 390                 }
 391                 mutex_exit(&req->lock);
 392         }
 393 }
 394 
 395 /*
 396  * Timeout worker thread for processing task queue.
 397  */
 398 static void
 399 timeout_taskq_thread(void *arg)
 400 {
 401         _NOTE(ARGUNUSED(arg));
 402         tm_req_t *kern_req;
 403         callb_cpr_t cprinfo;
 404 
 405         CALLB_CPR_INIT(&cprinfo, &disp_req_lock, callb_generic_cpr,
 406             "timeout_taskq_thread");
 407 
 408         /*
 409          * This thread is wakened up when a new request is added to
 410          * the queue. Then pick up all requests and dispatch them
 411          * via taskq_dispatch().
 412          */
 413         for (;;) {
 414                 /*
 415                  * Check the queue and pick up a request if the queue
 416                  * is not NULL.
 417                  */
 418                 mutex_enter(&disp_req_lock);
 419                 while ((kern_req = list_head(&kern_queue)) == NULL) {
 420                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
 421                         cv_wait(&kern_cv, &disp_req_lock);
 422                         CALLB_CPR_SAFE_END(&cprinfo, &disp_req_lock);
 423                 }
 424                 list_remove(&kern_queue, kern_req);
 425                 mutex_exit(&disp_req_lock);
 426 
 427                 /* Execute the timeout request via the taskq thread */
 428                 (void) taskq_dispatch(tm_taskq, timeout_execute,
 429                     (void *)kern_req, TQ_SLEEP);
 430         }
 431 }
 432 
 433 /*
 434  * Dispatch the timeout request based on the level specified.
 435  * If the level is equal to zero, notify the worker thread to
 436  * call taskq_dispatch() in kernel context. If the level is bigger
 437  * than zero, add a software interrupt request to the queue and raise
 438  * the interrupt level to the specified one.
 439  */
 440 static void
 441 timeout_dispatch(tm_req_t *req)
 442 {
 443         int level = req->level;
 444         extern void sir_on(int);
 445 
 446         if (level == TM_IPL_0) {
 447                 /* Add a new request to the tail */
 448                 mutex_enter(&disp_req_lock);
 449                 list_insert_tail(&kern_queue, req);
 450                 mutex_exit(&disp_req_lock);
 451 
 452                 /*
 453                  * notify the worker thread that this request
 454                  * is newly added to the queue.
 455                  * Note. this cv_signal() can be called after the
 456                  * mutex_lock.
 457                  */
 458                 cv_signal(&kern_cv);
 459         } else {
 460                 /* Add a new request to the tail */
 461                 mutex_enter(&disp_req_lock);
 462                 list_insert_tail(&intr_queue, req);
 463 
 464                 /* Issue the software interrupt */
 465                 if (intr_state & TM_INTR_START(level)) {
 466                         /*
 467                          * timer_softintr() is already running; no need to
 468                          * raise a siron. Due to lock protection of
 469                          * the intr_queue and intr_state, we know that
 470                          * timer_softintr() will see the new addition to
 471                          * the intr_queue.
 472                          */
 473                         mutex_exit(&disp_req_lock);
 474                 } else {
 475                         intr_state |= TM_INTR_SET(level);
 476                         mutex_exit(&disp_req_lock);
 477 
 478                         /* Raise an interrupt to execute timeout requests */
 479                         sir_on(level);
 480                 }
 481         }
 482 }
 483 
 484 /*
 485  * Check the software interrupt queue and invoke requests at the specified
 486  * interrupt level.
 487  * Note that the queue may change during call so that the disp_req_lock
 488  * and the intr_state are used to protect it.
 489  * The software interrupts supported here are up to the level 10. Higher
 490  * than 10 interrupts cannot be supported.
 491  */
 492 void
 493 timer_softintr(int level)
 494 {
 495         tm_req_t *intr_req;
 496         ASSERT(level >= TM_IPL_1 && level <= TM_IPL_10);
 497 
 498         /* Check if we are asked to process the softcall list */
 499         mutex_enter(&disp_req_lock);
 500         if (!(intr_state & TM_INTR_SET(level))) {
 501                 mutex_exit(&disp_req_lock);
 502                 return;
 503         }
 504 
 505         /* Notify this software interrupt request will be executed soon */
 506         intr_state |= TM_INTR_START(level);
 507         intr_state &= ~TM_INTR_SET(level);
 508 
 509         /* loop the link until there is no requests */
 510         for (intr_req = list_head(&intr_queue); intr_req != NULL;
 511             /* Nothing */) {
 512 
 513                 /* Check the interrupt level */
 514                 if (intr_req->level != level) {
 515                         intr_req = list_next(&intr_queue, intr_req);
 516                         continue;
 517                 }
 518                 list_remove(&intr_queue, intr_req);
 519                 mutex_exit(&disp_req_lock);
 520 
 521                 /* Execute the software interrupt request */
 522                 timeout_execute(intr_req);
 523 
 524                 mutex_enter(&disp_req_lock);
 525                 /* Restart the loop since new requests might be added */
 526                 intr_req = list_head(&intr_queue);
 527         }
 528 
 529         /* reset the interrupt state */
 530         intr_state &= ~TM_INTR_START(level);
 531         mutex_exit(&disp_req_lock);
 532 }
 533 
 534 /*
 535  *  void
 536  *  cyclic_timer(void)
 537  *
 538  *  Overview
 539  *   cyclic_timer() is a function invoked periodically by the cyclic
 540  *   subsystem.
 541  *
 542  *   The function calls timeout_invoke() with timeout requests whose
 543  *   expiration time is already reached.
 544  *
 545  *  Arguments
 546  *   Nothing
 547  *
 548  *  Return value
 549  *   Nothing
 550  */
 551 void
 552 cyclic_timer(void)
 553 {
 554         tm_req_t *req;
 555         timer_tw_t *tw;
 556         hrtime_t curr_tick, curr;
 557 
 558         /* If the system is suspended, just return */
 559         if (timer_suspended)
 560                 return;
 561 
 562         /* Get the current time */
 563         timer_hrtime = ddi_timer->tick_time = curr = gethrtime();
 564         curr_tick = tw_tick(ddi_timer->tick_time);
 565 
 566 restart:
 567         /*
 568          * Check the timer cogs to see if there are timeout requests
 569          * who reach the expiration time. Call timeout_invoke() to execute
 570          * the requests, then.
 571          */
 572         while (curr_tick >= ddi_timer->tick) {
 573                 tm_req_t *next;
 574                 tw = &ddi_timer->exhash[TM_HASH(ddi_timer->tick)];
 575                 mutex_enter(&tw->lock);
 576                 for (req = list_head(&tw->req); req != NULL; req = next) {
 577                         next = list_next(&tw->req, req);
 578                         /*
 579                          * If this request is already obsolete, free
 580                          * it here.
 581                          */
 582                         if (req->flags & TM_UTMCOMP) {
 583                                 /*
 584                                  * Remove this request from the timer,
 585                                  * then free it.
 586                                  */
 587                                 list_remove(&tw->req, req);
 588                                 kmem_cache_free(req_cache, req);
 589                         } else if (curr >= req->exp_time) {
 590                                 mutex_enter(&req->lock);
 591                                 /*
 592                                  * Check if this request is canceled, but not
 593                                  * being executed now.
 594                                  */
 595                                 if (req->flags & TM_CANCEL &&
 596                                     !(req->flags & TM_INVOKING)) {
 597                                         mutex_exit(&req->lock);
 598                                         continue;
 599                                 }
 600                                 /*
 601                                  * Record how many times timeout_execute()
 602                                  * must be invoked.
 603                                  */
 604                                 req->cnt++;
 605                                 /*
 606                                  * Invoke timeout_execute() via taskq or
 607                                  * software interrupt.
 608                                  */
 609                                 if (req->flags & TM_INVOKING) {
 610                                         /*
 611                                          * If it's already invoked,
 612                                          * There is nothing to do.
 613                                          */
 614                                         mutex_exit(&req->lock);
 615                                 } else {
 616                                         req->flags |= TM_INVOKING;
 617                                         mutex_exit(&req->lock);
 618                                         /*
 619                                          * Dispatch this timeout request.
 620                                          * timeout_dispatch() chooses either
 621                                          * a software interrupt or taskq thread
 622                                          * based on the level.
 623                                          */
 624                                         timeout_dispatch(req);
 625                                 }
 626                                 /*
 627                                  * Periodic timeout requests must prepare for
 628                                  * the next fire.
 629                                  */
 630                                 transfer_req(req, tw);
 631                         }
 632                 }
 633                 mutex_exit(&tw->lock);
 634                 ddi_timer->tick++;
 635         }
 636 
 637         /*
 638          * Check the current time. If we spend some amount of time,
 639          * double-check if some of the requests reaches the expiration
 640          * time during the work.
 641          */
 642         curr = gethrtime();
 643         curr_tick = tw_tick(curr);
 644         if (curr_tick >= ddi_timer->tick) {
 645                 ddi_timer->tick -= 1;
 646                 goto restart;
 647         }
 648         /* Adjustment for the next rolling */
 649         ddi_timer->tick -= 1;
 650 }
 651 
 652 /*
 653  *  void
 654  *  timer_init(void)
 655  *
 656  *  Overview
 657  *    timer_init() allocates the internal data structures used by
 658  *    i_timeout(), i_untimeout() and the timer.
 659  *
 660  *  Arguments
 661  *    Nothing
 662  *
 663  *  Return value
 664  *    Nothing
 665  *
 666  *  Caller's context
 667  *    timer_init() can be called in kernel context only.
 668  */
 669 void
 670 timer_init(void)
 671 {
 672         int i;
 673 
 674         /* Create kmem_cache for timeout requests */
 675         req_cache = kmem_cache_create("timeout_request", sizeof (tm_req_t),
 676             0, NULL, NULL, NULL, NULL, NULL, 0);
 677 
 678         /* Initialize the timer which is invoked by the cyclic subsystem */
 679         ddi_timer = kmem_alloc(sizeof (cyc_timer_t), KM_SLEEP);
 680         ddi_timer->res = nsec_per_tick;
 681         ddi_timer->tick = tw_tick(gethrtime());
 682         ddi_timer->tick_time = 0;
 683 
 684         /* Initialize the timing wheel */
 685         bzero((char *)&ddi_timer->idhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
 686         bzero((char *)&ddi_timer->exhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
 687 
 688         for (i = 0; i < TM_HASH_SZ; i++) {
 689                 list_create(&ddi_timer->idhash[i].req, sizeof (tm_req_t),
 690                     offsetof(tm_req_t, id_req));
 691                 mutex_init(&ddi_timer->idhash[i].lock, NULL, MUTEX_ADAPTIVE,
 692                     NULL);
 693 
 694                 list_create(&ddi_timer->exhash[i].req, sizeof (tm_req_t),
 695                     offsetof(tm_req_t, ex_req));
 696                 mutex_init(&ddi_timer->exhash[i].lock, NULL, MUTEX_ADAPTIVE,
 697                     NULL);
 698         }
 699 
 700         /* Create a taskq thread pool */
 701         tm_taskq = taskq_create_instance("timeout_taskq", 0,
 702             timer_taskq_num, MAXCLSYSPRI,
 703             timer_taskq_min_num, timer_taskq_max_num,
 704             TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
 705 
 706         /*
 707          * Initialize the taskq queue which is dedicated to this timeout
 708          * interface/timer.
 709          */
 710         list_create(&kern_queue, sizeof (tm_req_t),
 711             offsetof(tm_req_t, disp_req));
 712 
 713         /* Create a worker thread to dispatch the taskq thread */
 714         tm_work_thread = thread_create(NULL, 0, timeout_taskq_thread, NULL,
 715             0, &p0, TS_RUN, MAXCLSYSPRI);
 716 
 717         /*
 718          * Initialize the software interrupt queue which is dedicated to
 719          * this timeout interface/timer.
 720          */
 721         list_create(&intr_queue, sizeof (tm_req_t),
 722             offsetof(tm_req_t, disp_req));
 723 
 724         /*
 725          * Initialize the mutex lock used for both of kern_queue and
 726          * intr_queue.
 727          */
 728         mutex_init(&disp_req_lock, NULL, MUTEX_ADAPTIVE, NULL);
 729         cv_init(&kern_cv, NULL, CV_DEFAULT, NULL);
 730 
 731         /* Register the callback handler for the system suspend/resume */
 732         (void) callb_add(timer_cpr_callb, 0, CB_CL_CPR_CALLOUT, "cyclicTimer");
 733 }
 734 
 735 /*
 736  *  timeout_t
 737  *  i_timeout(void (*func)(void *), void *arg,  hrtime_t interval,
 738  *      int level, int flags)
 739  *
 740  *  Overview
 741  *    i_timeout() is an internal function scheduling the passed function
 742  *    to be invoked in the interval in nanoseconds. The callback function
 743  *    keeps invoked until the request is explicitly canceled by i_untimeout().
 744  *    This function is used for ddi_periodic_add(9F).
 745  *
 746  *  Arguments
 747  *
 748  *    func: the callback function
 749  *          the callback function will be invoked in kernel context if
 750  *          the level passed is the zero. Otherwise be invoked in interrupt
 751  *          context at the specified level by the argument "level".
 752  *
 753  *          Note that It's guaranteed by the cyclic subsystem that the
 754  *          function is invoked on the only one CPU and is never executed
 755  *          simultaneously even on MP system.
 756  *
 757  *     arg: the argument passed to the callback function
 758  *
 759  * interval: interval time in nanoseconds
 760  *          if the interval is the zero, the timer resolution is used.
 761  *
 762  *  level : callback interrupt level
 763  *          If the value is 0 (the zero), the callback function is invoked
 764  *          in kernel context. If the value is more than 0 (the zero), but
 765  *          less than or equal to 10, the callback function is invoked in
 766  *          interrupt context at the specified interrupt level.
 767  *          This value must be in range of 0-10.
 768  *
 769  *  Return value
 770  *    returns a non-zero opaque value (timeout_t) on success.
 771  *
 772  *  Caller's context
 773  *    i_timeout() can be called in user or kernel context.
 774  */
 775 timeout_t
 776 i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level)
 777 {
 778         hrtime_t start_time = gethrtime(), res;
 779         tm_req_t *req = NULL;
 780 
 781         /* Allocate and initialize the timeout request */
 782         req = kmem_cache_alloc(req_cache, KM_SLEEP);
 783         req->handler = func;
 784         req->arg = arg;
 785         req->h_thread = NULL;
 786         req->level = level;
 787         req->flags = 0;
 788         req->cnt = 0;
 789         mutex_init(&req->lock, NULL, MUTEX_ADAPTIVE, NULL);
 790         cv_init(&req->cv, NULL, CV_DEFAULT, NULL);
 791 
 792         /*
 793          * The resolution must be finer than or equal to
 794          * the requested interval. If it's not, set the resolution
 795          * to the interval.
 796          * Note. There is a restriction currently. Regardless of the
 797          * clock resolution used here, 10ms is set as the timer resolution.
 798          * Even on the 1ms resolution timer, the minimum interval is 10ms.
 799          */
 800         if ((res = i_get_res()) > interval) {
 801                 uintptr_t pc = (uintptr_t)req->handler;
 802                 ulong_t off;
 803                 cmn_err(CE_WARN,
 804                     "The periodic timeout (handler=%s, interval=%lld) "
 805                     "requests a finer interval than the supported resolution. "
 806                     "It rounds up to %lld\n", kobj_getsymname(pc, &off),
 807                     interval, res);
 808                 interval = res;
 809         }
 810 
 811         /*
 812          * If the specified interval is already multiples of
 813          * the resolution, use it as is. Otherwise, it rounds
 814          * up to multiples of the timer resolution.
 815          */
 816         req->interval = roundup(interval, i_get_res());
 817 
 818         /*
 819          * For the periodic timeout requests, the first expiration time will
 820          * be adjusted to the timer tick edge to take advantage of the cyclic
 821          * subsystem. In that case, the first fire is likely not an expected
 822          * one, but the fires later can be more accurate due to this.
 823          */
 824         req->exp_time = roundup(start_time + req->interval, i_get_res());
 825 
 826         /* Add the request to the timer */
 827         return (add_req(req));
 828 }
 829 
 830 /*
 831  *  void
 832  *  i_untimeout(timeout_t req)
 833  *
 834  *  Overview
 835  *    i_untimeout() is an internal function canceling the i_timeout()
 836  *    request previously issued.
 837  *    This function is used for ddi_periodic_delete(9F).
 838  *
 839  *  Argument
 840  *      req: timeout_t opaque value i_timeout() returned previously.
 841  *
 842  *  Return value
 843  *      Nothing.
 844  *
 845  *  Caller's context
 846  *    i_untimeout() can be called in user, kernel or interrupt context.
 847  *    It cannot be called in high interrupt context.
 848  *
 849  *  Note. This function is used by ddi_periodic_delete(), which cannot
 850  *  be called in interrupt context. As a result, this function is called
 851  *  in user or kernel context only in practice.
 852  */
 853 void
 854 i_untimeout(timeout_t timeout_req)
 855 {
 856         timer_tw_t *tid;
 857         tm_req_t *req;
 858         timeout_t id;
 859 
 860         /* Retrieve the id for this timeout request */
 861         id = (timeout_t)timeout_req;
 862         tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
 863 
 864         mutex_enter(&tid->lock);
 865         for (req = list_head(&tid->req); req != NULL;
 866             req = list_next(&tid->req, req)) {
 867                 if (req->id == id)
 868                         break;
 869         }
 870         if (req == NULL) {
 871                 /* There is no requests with this id after all */
 872                 mutex_exit(&tid->lock);
 873                 return;
 874         }
 875         mutex_enter(&req->lock);
 876 
 877         /* Unregister this request first */
 878         list_remove(&tid->req, req);
 879 
 880         /* Notify that this request is canceled */
 881         req->flags |= TM_CANCEL;
 882 
 883         /* Check if the handler is invoked */
 884         if (req->flags & TM_INVOKING) {
 885                 /*
 886                  * This request will be removed by timeout_execute() later,
 887                  * so that there is no extra thing to do any more.
 888                  */
 889                 mutex_exit(&req->lock);
 890                 mutex_exit(&tid->lock);
 891                 return;
 892         }
 893         mutex_exit(&req->lock);
 894         mutex_exit(&tid->lock);
 895 
 896         /*
 897          * Notify untimeout() is about to be finished, and this request
 898          * can be freed.
 899          */
 900         atomic_or_uint(&req->flags, TM_UTMCOMP);
 901 }