6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/atomic.h>
28 #include <sys/callb.h>
29 #include <sys/conf.h>
30 #include <sys/cmn_err.h>
31 #include <sys/taskq.h>
32 #include <sys/dditypes.h>
33 #include <sys/ddi_timer.h>
34 #include <sys/disp.h>
35 #include <sys/kobj.h>
36 #include <sys/note.h>
37 #include <sys/param.h>
38 #include <sys/sysmacros.h>
39 #include <sys/systm.h>
40 #include <sys/time.h>
41 #include <sys/types.h>
42
43 /*
44 * global variables for timeout request
45 */
46 static kmem_cache_t *req_cache; /* kmem cache for timeout request */
47
48 /*
49 * taskq parameters for cyclic_timer
50 *
51 * timer_taskq_num:
52 * timer_taskq_num represents the number of taskq threads.
53 * Currently 4 threads are pooled to handle periodic timeout requests.
54 * This number is chosen based on the fact that the callout (one-time
55 * timeout framework) uses 8 threads with TQ_NOSLEEP; the periodic timeout
56 * calls taskq_dispatch() with TQ_SLEEP instead, and in this case, 4 threads
57 * should be sufficient to handle periodic timeout requests. (see also
58 * timer_taskq_max_num below)
59 *
60 * timer_taskq_min_num:
61 * timer_taskq_min_num represents the number of pre-populated taskq_ent
62 * structures, and this variable holds the same value as timer_taskq_num does.
63 *
64 * timer_taskq_max_num:
65 * Since TQ_SLEEP is set when taskq_dispatch() is called, the framework waits
66 * for one second if more taskq_ent structures than timer_taskq_max_num are
67 * required. However, from the timeout point of view, one second is much longer
68 * than expected, and to prevent this occurrence, timer_taskq_max_num should
69 * hold a sufficiently-large value, which is 128 here. Note that since the size
70 * of taskq_ent_t is relatively small, this doesn't use up the resource so much.
71 * (Currently the size is less than 8k at most)
72 *
73 * About the detailed explanation of the taskq function arguments, please see
74 * usr/src/uts/common/os/taskq.c.
75 */
76 int timer_taskq_num = 4; /* taskq thread number */
77 int timer_taskq_min_num = 4; /* min. number of taskq_ent structs */
78 int timer_taskq_max_num = 128; /* max. number of taskq_ent structs */
79 static taskq_t *tm_taskq; /* taskq thread pool */
80 static kthread_t *tm_work_thread; /* work thread invoking taskq */
81
82 /*
83 * timer variables
84 */
85 static cyc_timer_t *ddi_timer; /* ddi timer based on the cyclic */
86 static volatile hrtime_t timer_hrtime; /* current tick time on the timer */
87
88 /*
89 * Variable used for the suspend/resume.
90 */
91 static volatile boolean_t timer_suspended;
92
93 /*
94 * Kernel taskq queue to ddi timer
95 */
96 static list_t kern_queue; /* kernel thread request queue */
97 static kcondvar_t kern_cv; /* condition variable for taskq queue */
98
99 /*
100 * Software interrupt queue dedicated to ddi timer
101 */
102 static list_t intr_queue; /* software interrupt request queue */
103 static uint_t intr_state; /* software interrupt state */
104
105 /*
106 * This lock is used to protect the intr_queue and kern_queue.
107 * It's also used to protect the intr_state which represents the software
108 * interrupt state for the timer.
109 */
110 static kmutex_t disp_req_lock;
111
112 /*
113 * the periodic timer interrupt priority level
114 */
115 enum {
116 TM_IPL_0 = 0, /* kernel context */
117 TM_IPL_1, TM_IPL_2, TM_IPL_3, /* level 1-3 */
118 TM_IPL_4, TM_IPL_5, TM_IPL_6, /* level 4-6 */
119 TM_IPL_7, TM_IPL_8, TM_IPL_9, /* level 7-9 */
120 TM_IPL_10 /* level 10 */
121 };
122
123 /*
124 * A callback handler used by CPR to stop and resume callouts.
125 * Since the taskq uses TASKQ_CPR_SAFE, the function just set the boolean
126 * flag to timer_suspended here.
127 */
128 /*ARGSUSED*/
129 static boolean_t
130 timer_cpr_callb(void *arg, int code)
131 {
132 timer_suspended = (code == CB_CODE_CPR_CHKPT);
133 return (B_TRUE);
134 }
135
136 /*
137 * Return a proposed timeout request id. add_req() determines whether
138 * or not the proposed one is used. If it's not suitable, add_req()
139 * recalls get_req_cnt(). To reduce the lock contention between the
140 * timer and i_untimeout(), the atomic instruction should be used here.
141 */
142 static timeout_t
143 get_req_cnt(void)
144 {
145 static volatile ulong_t timeout_cnt = 0;
146 return ((timeout_t)atomic_inc_ulong_nv(&timeout_cnt));
147 }
148
149 /*
150 * Get the system resolution.
151 * Note. currently there is a restriction about the system resolution, and
152 * the 10ms tick (the default clock resolution) is only supported now.
153 */
154 static hrtime_t
155 i_get_res(void)
156 {
157 return ((hrtime_t)10000000); /* 10ms tick only */
158 }
159
160 /*
161 * Return the value for the cog of the timing wheel.
162 * TICK_FACTOR is used to gain a finer cog on the clock resolution.
163 */
164 static hrtime_t
165 tw_tick(hrtime_t time)
166 {
167 return ((time << TICK_FACTOR) / ddi_timer->res);
168 }
169
170 /*
171 * Calculate the expiration time for the timeout request.
172 */
173 static hrtime_t
174 expire_tick(tm_req_t *req)
175 {
176 return (tw_tick(req->exp_time));
177 }
178
179 /*
180 * Register a timeout request to the timer. This function is used
181 * in i_timeout().
182 */
183 static timeout_t
184 add_req(tm_req_t *req)
185 {
186 timer_tw_t *tid, *tw;
187 tm_req_t *next;
188 timeout_t id;
189
190 retry:
191 /*
192 * Retrieve a timeout request id. Since i_timeout() needs to return
193 * a non-zero value, re-try if the zero is gotten.
194 */
195 if ((id = get_req_cnt()) == 0)
196 id = get_req_cnt();
197
198 /*
199 * Check if the id is not used yet. Since the framework now deals
200 * with the periodic timeout requests, we cannot assume the id
201 * allocated (long) before doesn't exist any more when it will
202 * be re-assigned again (especially on 32bit) but need to handle
203 * this case to solve the conflicts. If it's used already, retry
204 * another.
205 */
206 tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
207 mutex_enter(&tid->lock);
208 for (next = list_head(&tid->req); next != NULL;
209 next = list_next(&tid->req, next)) {
210 if (next->id == id) {
211 mutex_exit(&tid->lock);
212 goto retry;
213 }
214 }
215 /* Nobody uses this id yet */
216 req->id = id;
217
218 /*
219 * Register this request to the timer.
220 * The list operation must be list_insert_head().
221 * Other operations can degrade performance.
222 */
223 list_insert_head(&tid->req, req);
224 mutex_exit(&tid->lock);
225
226 tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
227 mutex_enter(&tw->lock);
228 /*
229 * Other operations than list_insert_head() can
230 * degrade performance here.
231 */
232 list_insert_head(&tw->req, req);
233 mutex_exit(&tw->lock);
234
235 return (id);
236 }
237
238 /*
239 * Periodic timeout requests cannot be removed until they are canceled
240 * explicitly. Until then, they need to be re-registerd after they are
241 * fired. transfer_req() re-registers the requests for the next fires.
242 * Note. transfer_req() sends the cv_signal to timeout_execute(), which
243 * runs in interrupt context. Make sure this function will not be blocked,
244 * otherwise the deadlock situation can occur.
245 */
246 static void
247 transfer_req(tm_req_t *req, timer_tw_t *tw)
248 {
249 timer_tw_t *new_tw;
250 hrtime_t curr_time;
251 ASSERT(tw && MUTEX_HELD(&tw->lock));
252
253 /* Calculate the next expiration time by interval */
254 req->exp_time += req->interval;
255 curr_time = gethrtime();
256
257 /*
258 * If a long time (more than 1 clock resolution) has already
259 * passed for some reason (e.g. debugger or high interrupt),
260 * round up the next expiration to the appropriate one
261 * since this request is periodic and never catches with it.
262 */
263 if (curr_time - req->exp_time >= ddi_timer->res) {
264 req->exp_time = roundup(curr_time + req->interval,
265 ddi_timer->res);
266 }
267
268 /*
269 * Re-register this request.
270 * Note. since it is guaranteed that the timer is invoked on only
271 * one CPU at any time (by the cyclic subsystem), a deadlock
272 * cannot occur regardless of the lock order here.
273 */
274 new_tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
275
276 /*
277 * If it's on the timer cog already, there is nothing
278 * to do. Just return.
279 */
280 if (new_tw == tw)
281 return;
282
283 /* Remove this request from the timer */
284 list_remove(&tw->req, req);
285
286 /* Re-register this request to the timer */
287 mutex_enter(&new_tw->lock);
288
289 /*
290 * Other operations than list_insert_head() can
291 * degrade performance here.
292 */
293 list_insert_head(&new_tw->req, req);
294 mutex_exit(&new_tw->lock);
295
296 /*
297 * Set the TM_TRANSFER flag and notify the request is transfered
298 * completely. This prevents a race in the case that this request
299 * is serviced on another CPU already.
300 */
301 mutex_enter(&req->lock);
302 req->flags |= TM_TRANSFER;
303 cv_signal(&req->cv);
304 mutex_exit(&req->lock);
305 }
306
307 /*
308 * Execute timeout requests.
309 * Note. since timeout_execute() can run in interrupt context and block
310 * on condition variables, there are restrictions on the timer code that
311 * signals these condition variables (see i_untimeout(), transfer_req(),
312 * and condvar(9F)). Functions that signal these cvs must ensure that
313 * they will not be blocked (for memory allocations or any other reason)
314 * since condition variables don't support priority inheritance.
315 */
316 static void
317 timeout_execute(void *arg)
318 {
319 tm_req_t *req = (tm_req_t *)arg;
320 ASSERT(req->flags & TM_INVOKING && !(req->flags & TM_EXECUTING));
321
322 for (;;) {
323 /*
324 * Check if this request is canceled. If it's canceled, do not
325 * execute this request.
326 */
327 mutex_enter(&req->lock);
328 if (!(req->flags & TM_CANCEL)) {
329 /*
330 * Set the current thread to prevent a dead lock
331 * situation in case that this timeout request is
332 * canceled in the handler being invoked now.
333 * (this doesn't violate the spec) Set TM_EXECUTING
334 * to show this handler is invoked soon.
335 */
336 req->h_thread = curthread;
337 req->flags |= TM_EXECUTING;
338 mutex_exit(&req->lock);
339
340 /* The handler is invoked without holding any locks */
341 (*req->handler)(req->arg);
342
343 mutex_enter(&req->lock);
344 }
345
346 /*
347 * Check if this request is canceled or not. If not, prepare
348 * for the next fire.
349 */
350 if (req->flags & TM_CANCEL) {
351 timer_tw_t *tw;
352 /*
353 * Wait until the timer finishes all things for
354 * this request.
355 */
356 while (!(req->flags & TM_TRANSFER))
357 cv_wait(&req->cv, &req->lock);
358 mutex_exit(&req->lock);
359 ASSERT(req->flags & TM_TRANSFER);
360
361 /* Remove this request from the timer */
362 tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
363 mutex_enter(&tw->lock);
364 list_remove(&tw->req, req);
365 mutex_exit(&tw->lock);
366
367 /* Free this request */
368 kmem_cache_free(req_cache, req);
369 return;
370 }
371 ASSERT(req->flags & TM_EXECUTING);
372
373 /*
374 * TM_EXECUTING must be set at this point.
375 * Unset the flag.
376 */
377 req->flags &= ~(TM_EXECUTING | TM_TRANSFER);
378
379 /*
380 * Decrease the request cnt. The reqest cnt shows
381 * how many times this request is executed now.
382 * If this counter becomes the zero, drop TM_INVOKING
383 * to show there is no requests to do now.
384 */
385 req->cnt--;
386 if (req->cnt == 0) {
387 req->flags &= ~TM_INVOKING;
388 mutex_exit(&req->lock);
389 return;
390 }
391 mutex_exit(&req->lock);
392 }
393 }
394
395 /*
396 * Timeout worker thread for processing task queue.
397 */
398 static void
399 timeout_taskq_thread(void *arg)
400 {
401 _NOTE(ARGUNUSED(arg));
402 tm_req_t *kern_req;
403 callb_cpr_t cprinfo;
404
405 CALLB_CPR_INIT(&cprinfo, &disp_req_lock, callb_generic_cpr,
406 "timeout_taskq_thread");
407
408 /*
409 * This thread is wakened up when a new request is added to
410 * the queue. Then pick up all requests and dispatch them
411 * via taskq_dispatch().
412 */
413 for (;;) {
414 /*
415 * Check the queue and pick up a request if the queue
416 * is not NULL.
417 */
418 mutex_enter(&disp_req_lock);
419 while ((kern_req = list_head(&kern_queue)) == NULL) {
420 CALLB_CPR_SAFE_BEGIN(&cprinfo);
421 cv_wait(&kern_cv, &disp_req_lock);
422 CALLB_CPR_SAFE_END(&cprinfo, &disp_req_lock);
423 }
424 list_remove(&kern_queue, kern_req);
425 mutex_exit(&disp_req_lock);
426
427 /* Execute the timeout request via the taskq thread */
428 (void) taskq_dispatch(tm_taskq, timeout_execute,
429 (void *)kern_req, TQ_SLEEP);
430 }
431 }
432
433 /*
434 * Dispatch the timeout request based on the level specified.
435 * If the level is equal to zero, notify the worker thread to
436 * call taskq_dispatch() in kernel context. If the level is bigger
437 * than zero, add a software interrupt request to the queue and raise
438 * the interrupt level to the specified one.
439 */
440 static void
441 timeout_dispatch(tm_req_t *req)
442 {
443 int level = req->level;
444 extern void sir_on(int);
445
446 if (level == TM_IPL_0) {
447 /* Add a new request to the tail */
448 mutex_enter(&disp_req_lock);
449 list_insert_tail(&kern_queue, req);
450 mutex_exit(&disp_req_lock);
451
452 /*
453 * notify the worker thread that this request
454 * is newly added to the queue.
455 * Note. this cv_signal() can be called after the
456 * mutex_lock.
457 */
458 cv_signal(&kern_cv);
459 } else {
460 /* Add a new request to the tail */
461 mutex_enter(&disp_req_lock);
462 list_insert_tail(&intr_queue, req);
463
464 /* Issue the software interrupt */
465 if (intr_state & TM_INTR_START(level)) {
466 /*
467 * timer_softintr() is already running; no need to
468 * raise a siron. Due to lock protection of
469 * the intr_queue and intr_state, we know that
470 * timer_softintr() will see the new addition to
471 * the intr_queue.
472 */
473 mutex_exit(&disp_req_lock);
474 } else {
475 intr_state |= TM_INTR_SET(level);
476 mutex_exit(&disp_req_lock);
477
478 /* Raise an interrupt to execute timeout requests */
479 sir_on(level);
480 }
481 }
482 }
483
484 /*
485 * Check the software interrupt queue and invoke requests at the specified
486 * interrupt level.
487 * Note that the queue may change during call so that the disp_req_lock
488 * and the intr_state are used to protect it.
489 * The software interrupts supported here are up to the level 10. Higher
490 * than 10 interrupts cannot be supported.
491 */
492 void
493 timer_softintr(int level)
494 {
495 tm_req_t *intr_req;
496 ASSERT(level >= TM_IPL_1 && level <= TM_IPL_10);
497
498 /* Check if we are asked to process the softcall list */
499 mutex_enter(&disp_req_lock);
500 if (!(intr_state & TM_INTR_SET(level))) {
501 mutex_exit(&disp_req_lock);
502 return;
503 }
504
505 /* Notify this software interrupt request will be executed soon */
506 intr_state |= TM_INTR_START(level);
507 intr_state &= ~TM_INTR_SET(level);
508
509 /* loop the link until there is no requests */
510 for (intr_req = list_head(&intr_queue); intr_req != NULL;
511 /* Nothing */) {
512
513 /* Check the interrupt level */
514 if (intr_req->level != level) {
515 intr_req = list_next(&intr_queue, intr_req);
516 continue;
517 }
518 list_remove(&intr_queue, intr_req);
519 mutex_exit(&disp_req_lock);
520
521 /* Execute the software interrupt request */
522 timeout_execute(intr_req);
523
524 mutex_enter(&disp_req_lock);
525 /* Restart the loop since new requests might be added */
526 intr_req = list_head(&intr_queue);
527 }
528
529 /* reset the interrupt state */
530 intr_state &= ~TM_INTR_START(level);
531 mutex_exit(&disp_req_lock);
532 }
533
534 /*
535 * void
536 * cyclic_timer(void)
537 *
538 * Overview
539 * cyclic_timer() is a function invoked periodically by the cyclic
540 * subsystem.
541 *
542 * The function calls timeout_invoke() with timeout requests whose
543 * expiration time is already reached.
544 *
545 * Arguments
546 * Nothing
547 *
548 * Return value
549 * Nothing
550 */
551 void
552 cyclic_timer(void)
553 {
554 tm_req_t *req;
555 timer_tw_t *tw;
556 hrtime_t curr_tick, curr;
557
558 /* If the system is suspended, just return */
559 if (timer_suspended)
560 return;
561
562 /* Get the current time */
563 timer_hrtime = ddi_timer->tick_time = curr = gethrtime();
564 curr_tick = tw_tick(ddi_timer->tick_time);
565
566 restart:
567 /*
568 * Check the timer cogs to see if there are timeout requests
569 * who reach the expiration time. Call timeout_invoke() to execute
570 * the requests, then.
571 */
572 while (curr_tick >= ddi_timer->tick) {
573 tm_req_t *next;
574 tw = &ddi_timer->exhash[TM_HASH(ddi_timer->tick)];
575 mutex_enter(&tw->lock);
576 for (req = list_head(&tw->req); req != NULL; req = next) {
577 next = list_next(&tw->req, req);
578 /*
579 * If this request is already obsolete, free
580 * it here.
581 */
582 if (req->flags & TM_UTMCOMP) {
583 /*
584 * Remove this request from the timer,
585 * then free it.
586 */
587 list_remove(&tw->req, req);
588 kmem_cache_free(req_cache, req);
589 } else if (curr >= req->exp_time) {
590 mutex_enter(&req->lock);
591 /*
592 * Check if this request is canceled, but not
593 * being executed now.
594 */
595 if (req->flags & TM_CANCEL &&
596 !(req->flags & TM_INVOKING)) {
597 mutex_exit(&req->lock);
598 continue;
599 }
600 /*
601 * Record how many times timeout_execute()
602 * must be invoked.
603 */
604 req->cnt++;
605 /*
606 * Invoke timeout_execute() via taskq or
607 * software interrupt.
608 */
609 if (req->flags & TM_INVOKING) {
610 /*
611 * If it's already invoked,
612 * There is nothing to do.
613 */
614 mutex_exit(&req->lock);
615 } else {
616 req->flags |= TM_INVOKING;
617 mutex_exit(&req->lock);
618 /*
619 * Dispatch this timeout request.
620 * timeout_dispatch() chooses either
621 * a software interrupt or taskq thread
622 * based on the level.
623 */
624 timeout_dispatch(req);
625 }
626 /*
627 * Periodic timeout requests must prepare for
628 * the next fire.
629 */
630 transfer_req(req, tw);
631 }
632 }
633 mutex_exit(&tw->lock);
634 ddi_timer->tick++;
635 }
636
637 /*
638 * Check the current time. If we spend some amount of time,
639 * double-check if some of the requests reaches the expiration
640 * time during the work.
641 */
642 curr = gethrtime();
643 curr_tick = tw_tick(curr);
644 if (curr_tick >= ddi_timer->tick) {
645 ddi_timer->tick -= 1;
646 goto restart;
647 }
648 /* Adjustment for the next rolling */
649 ddi_timer->tick -= 1;
650 }
651
652 /*
653 * void
654 * timer_init(void)
655 *
656 * Overview
657 * timer_init() allocates the internal data structures used by
658 * i_timeout(), i_untimeout() and the timer.
659 *
660 * Arguments
661 * Nothing
662 *
663 * Return value
664 * Nothing
665 *
666 * Caller's context
667 * timer_init() can be called in kernel context only.
668 */
669 void
670 timer_init(void)
671 {
672 int i;
673
674 /* Create kmem_cache for timeout requests */
675 req_cache = kmem_cache_create("timeout_request", sizeof (tm_req_t),
676 0, NULL, NULL, NULL, NULL, NULL, 0);
677
678 /* Initialize the timer which is invoked by the cyclic subsystem */
679 ddi_timer = kmem_alloc(sizeof (cyc_timer_t), KM_SLEEP);
680 ddi_timer->res = nsec_per_tick;
681 ddi_timer->tick = tw_tick(gethrtime());
682 ddi_timer->tick_time = 0;
683
684 /* Initialize the timing wheel */
685 bzero((char *)&ddi_timer->idhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
686 bzero((char *)&ddi_timer->exhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
687
688 for (i = 0; i < TM_HASH_SZ; i++) {
689 list_create(&ddi_timer->idhash[i].req, sizeof (tm_req_t),
690 offsetof(tm_req_t, id_req));
691 mutex_init(&ddi_timer->idhash[i].lock, NULL, MUTEX_ADAPTIVE,
692 NULL);
693
694 list_create(&ddi_timer->exhash[i].req, sizeof (tm_req_t),
695 offsetof(tm_req_t, ex_req));
696 mutex_init(&ddi_timer->exhash[i].lock, NULL, MUTEX_ADAPTIVE,
697 NULL);
698 }
699
700 /* Create a taskq thread pool */
701 tm_taskq = taskq_create_instance("timeout_taskq", 0,
702 timer_taskq_num, MAXCLSYSPRI,
703 timer_taskq_min_num, timer_taskq_max_num,
704 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
705
706 /*
707 * Initialize the taskq queue which is dedicated to this timeout
708 * interface/timer.
709 */
710 list_create(&kern_queue, sizeof (tm_req_t),
711 offsetof(tm_req_t, disp_req));
712
713 /* Create a worker thread to dispatch the taskq thread */
714 tm_work_thread = thread_create(NULL, 0, timeout_taskq_thread, NULL,
715 0, &p0, TS_RUN, MAXCLSYSPRI);
716
717 /*
718 * Initialize the software interrupt queue which is dedicated to
719 * this timeout interface/timer.
720 */
721 list_create(&intr_queue, sizeof (tm_req_t),
722 offsetof(tm_req_t, disp_req));
723
724 /*
725 * Initialize the mutex lock used for both of kern_queue and
726 * intr_queue.
727 */
728 mutex_init(&disp_req_lock, NULL, MUTEX_ADAPTIVE, NULL);
729 cv_init(&kern_cv, NULL, CV_DEFAULT, NULL);
730
731 /* Register the callback handler for the system suspend/resume */
732 (void) callb_add(timer_cpr_callb, 0, CB_CL_CPR_CALLOUT, "cyclicTimer");
733 }
734
735 /*
736 * timeout_t
737 * i_timeout(void (*func)(void *), void *arg, hrtime_t interval,
738 * int level, int flags)
739 *
740 * Overview
741 * i_timeout() is an internal function scheduling the passed function
742 * to be invoked in the interval in nanoseconds. The callback function
743 * keeps invoked until the request is explicitly canceled by i_untimeout().
744 * This function is used for ddi_periodic_add(9F).
745 *
746 * Arguments
747 *
748 * func: the callback function
749 * the callback function will be invoked in kernel context if
750 * the level passed is the zero. Otherwise be invoked in interrupt
751 * context at the specified level by the argument "level".
752 *
753 * Note that It's guaranteed by the cyclic subsystem that the
754 * function is invoked on the only one CPU and is never executed
755 * simultaneously even on MP system.
756 *
757 * arg: the argument passed to the callback function
758 *
759 * interval: interval time in nanoseconds
760 * if the interval is the zero, the timer resolution is used.
761 *
762 * level : callback interrupt level
763 * If the value is 0 (the zero), the callback function is invoked
764 * in kernel context. If the value is more than 0 (the zero), but
765 * less than or equal to 10, the callback function is invoked in
766 * interrupt context at the specified interrupt level.
767 * This value must be in range of 0-10.
768 *
769 * Return value
770 * returns a non-zero opaque value (timeout_t) on success.
771 *
772 * Caller's context
773 * i_timeout() can be called in user or kernel context.
774 */
775 timeout_t
776 i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level)
777 {
778 hrtime_t start_time = gethrtime(), res;
779 tm_req_t *req = NULL;
780
781 /* Allocate and initialize the timeout request */
782 req = kmem_cache_alloc(req_cache, KM_SLEEP);
783 req->handler = func;
784 req->arg = arg;
785 req->h_thread = NULL;
786 req->level = level;
787 req->flags = 0;
788 req->cnt = 0;
789 mutex_init(&req->lock, NULL, MUTEX_ADAPTIVE, NULL);
790 cv_init(&req->cv, NULL, CV_DEFAULT, NULL);
791
792 /*
793 * The resolution must be finer than or equal to
794 * the requested interval. If it's not, set the resolution
795 * to the interval.
796 * Note. There is a restriction currently. Regardless of the
797 * clock resolution used here, 10ms is set as the timer resolution.
798 * Even on the 1ms resolution timer, the minimum interval is 10ms.
799 */
800 if ((res = i_get_res()) > interval) {
801 uintptr_t pc = (uintptr_t)req->handler;
802 ulong_t off;
803 cmn_err(CE_WARN,
804 "The periodic timeout (handler=%s, interval=%lld) "
805 "requests a finer interval than the supported resolution. "
806 "It rounds up to %lld\n", kobj_getsymname(pc, &off),
807 interval, res);
808 interval = res;
809 }
810
811 /*
812 * If the specified interval is already multiples of
813 * the resolution, use it as is. Otherwise, it rounds
814 * up to multiples of the timer resolution.
815 */
816 req->interval = roundup(interval, i_get_res());
817
818 /*
819 * For the periodic timeout requests, the first expiration time will
820 * be adjusted to the timer tick edge to take advantage of the cyclic
821 * subsystem. In that case, the first fire is likely not an expected
822 * one, but the fires later can be more accurate due to this.
823 */
824 req->exp_time = roundup(start_time + req->interval, i_get_res());
825
826 /* Add the request to the timer */
827 return (add_req(req));
828 }
829
830 /*
831 * void
832 * i_untimeout(timeout_t req)
833 *
834 * Overview
835 * i_untimeout() is an internal function canceling the i_timeout()
836 * request previously issued.
837 * This function is used for ddi_periodic_delete(9F).
838 *
839 * Argument
840 * req: timeout_t opaque value i_timeout() returned previously.
841 *
842 * Return value
843 * Nothing.
844 *
845 * Caller's context
846 * i_untimeout() can be called in user, kernel or interrupt context.
847 * It cannot be called in high interrupt context.
848 *
849 * Note. This function is used by ddi_periodic_delete(), which cannot
850 * be called in interrupt context. As a result, this function is called
851 * in user or kernel context only in practice.
852 */
853 void
854 i_untimeout(timeout_t timeout_req)
855 {
856 timer_tw_t *tid;
857 tm_req_t *req;
858 timeout_t id;
859
860 /* Retrieve the id for this timeout request */
861 id = (timeout_t)timeout_req;
862 tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
863
864 mutex_enter(&tid->lock);
865 for (req = list_head(&tid->req); req != NULL;
866 req = list_next(&tid->req, req)) {
867 if (req->id == id)
868 break;
869 }
870 if (req == NULL) {
871 /* There is no requests with this id after all */
872 mutex_exit(&tid->lock);
873 return;
874 }
875 mutex_enter(&req->lock);
876
877 /* Unregister this request first */
878 list_remove(&tid->req, req);
879
880 /* Notify that this request is canceled */
881 req->flags |= TM_CANCEL;
882
883 /* Check if the handler is invoked */
884 if (req->flags & TM_INVOKING) {
885 /*
886 * This request will be removed by timeout_execute() later,
887 * so that there is no extra thing to do any more.
888 */
889 mutex_exit(&req->lock);
890 mutex_exit(&tid->lock);
891 return;
892 }
893 mutex_exit(&req->lock);
894 mutex_exit(&tid->lock);
895
896 /*
897 * Notify untimeout() is about to be finished, and this request
898 * can be freed.
899 */
900 atomic_or_uint(&req->flags, TM_UTMCOMP);
901 }
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /*
27 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/cmn_err.h>
31 #include <sys/ddi_timer.h>
32 #include <sys/id_space.h>
33 #include <sys/kobj.h>
34 #include <sys/sysmacros.h>
35 #include <sys/systm.h>
36 #include <sys/taskq.h>
37 #include <sys/taskq_impl.h>
38 #include <sys/time.h>
39 #include <sys/types.h>
40 #include <sys/sdt.h>
41
42 /*
43 * The ddi_periodic_add(9F) Implementation
44 *
45 * This file contains the implementation of the ddi_periodic_add(9F) interface.
46 * It is a thin wrapper around the cyclic subsystem (see documentation in
47 * uts/common/os/cyclic.c), providing a DDI interface for registering
48 * (and unregistering) callbacks for periodic invocation at arbitrary
49 * interrupt levels, or in kernel context.
50 *
51 * Each call to ddi_periodic_add will result in a new opaque handle, as
52 * allocated from an id_space, a new "periodic" object (ddi_periodic_impl_t)
53 * and a registered cyclic.
54 *
55 * Operation
56 *
57 * Whenever the cyclic fires, our cyclic handler checks that the particular
58 * periodic is not dispatched already (we do not support overlapping execution
59 * of the consumer's handler function), and not yet cancelled. If both of
60 * these conditions hold, we mark the periodic as DPF_DISPATCHED and enqueue it
61 * to either the taskq (for DDI_IPL_0) or to one of the soft interrupt queues
62 * (DDI_IPL_1 to DDI_IPL_10).
63 *
64 * While the taskq (or soft interrupt handler) is handling a particular
65 * periodic, we mark it as DPF_EXECUTING. When complete, we reset both
66 * DPF_DISPATCHED and DPF_EXECUTING.
67 *
68 * Cancellation
69 *
70 * ddi_periodic_delete(9F) historically had spectacularly loose semantics with
71 * respect to cancellation concurrent with handler execution. These semantics
72 * are now tighter:
73 *
74 * 1. At most one invocation of ddi_periodic_delete(9F) will actually
75 * perform the deletion, all others will return immediately.
76 * 2. The invocation that performs the deletion will _block_ until
77 * the handler is no longer running, and all resources have been
78 * released.
79 *
80 * We affect this model by removing the cancelling periodic from the
81 * global list and marking it DPF_CANCELLED. This will prevent further
82 * execution of the handler. We then wait on a CV until the DPF_EXECUTING
83 * and DPF_DISPATCHED flags are clear, which means the periodic is removed
84 * from all request queues, is no longer executing, and may be freed. At this
85 * point we return the opaque ID to the id_space and free the memory.
86 *
87 * NOTE:
88 * The ddi_periodic_add(9F) interface is presently limited to a minimum period
89 * of 10ms between firings.
90 */
91
92 /*
93 * Tuneables:
94 */
95 int ddi_periodic_max_id = 1024;
96 int ddi_periodic_taskq_threadcount = 4;
97 hrtime_t ddi_periodic_resolution = 10000000;
98
99 /*
100 * Globals:
101 */
102 static kmem_cache_t *periodic_cache;
103 static id_space_t *periodic_id_space;
104 static taskq_t *periodic_taskq;
105
106 /*
107 * periodics_lock protects the list of all periodics (periodics), and
108 * each of the soft interrupt request queues (periodic_softint_queue).
109 *
110 * Do not hold an individual periodic's lock while obtaining periodics_lock.
111 * While in the periodic_softint_queue list, the periodic will be marked
112 * DPF_DISPATCHED, and thus safe from frees. Only the invocation of
113 * i_untimeout() that removes the periodic from the global list is allowed
114 * to free it.
115 */
116 static kmutex_t periodics_lock;
117 static list_t periodics;
118 static list_t periodic_softint_queue[10]; /* for IPL1 up to IPL10 */
119
120 typedef enum periodic_ipl {
121 PERI_IPL_0 = 0,
122 PERI_IPL_1,
123 PERI_IPL_2,
124 PERI_IPL_3,
125 PERI_IPL_4,
126 PERI_IPL_5,
127 PERI_IPL_6,
128 PERI_IPL_7,
129 PERI_IPL_8,
130 PERI_IPL_9,
131 PERI_IPL_10,
132 } periodic_ipl_t;
133
134 /*
135 * This function may be called either from a soft interrupt handler
136 * (ddi_periodic_softintr), or as a taskq worker function.
137 */
138 static void
139 periodic_execute(void *arg)
140 {
141 ddi_periodic_impl_t *dpr = arg;
142 mutex_enter(&dpr->dpr_lock);
143
144 /*
145 * We must be DISPATCHED, but not yet EXECUTING:
146 */
147 VERIFY((dpr->dpr_flags & (DPF_DISPATCHED | DPF_EXECUTING)) ==
148 DPF_DISPATCHED);
149
150 if (!(dpr->dpr_flags & DPF_CANCELLED)) {
151 int level = dpr->dpr_level;
152 uint64_t count = dpr->dpr_fire_count;
153 /*
154 * If we have not yet been cancelled, then
155 * mark us executing:
156 */
157 dpr->dpr_flags |= DPF_EXECUTING;
158 mutex_exit(&dpr->dpr_lock);
159
160 /*
161 * Execute the handler, without holding locks:
162 */
163 DTRACE_PROBE4(ddi__periodic__execute, void *, dpr->dpr_handler,
164 void *, dpr->dpr_arg, int, level, uint64_t, count);
165 (*dpr->dpr_handler)(dpr->dpr_arg);
166 DTRACE_PROBE4(ddi__periodic__done, void *, dpr->dpr_handler,
167 void *, dpr->dpr_arg, int, level, uint64_t, count);
168
169 mutex_enter(&dpr->dpr_lock);
170 dpr->dpr_fire_count++;
171 }
172
173 /*
174 * We're done with this periodic for now, so release it and
175 * wake anybody that was waiting for us to be finished:
176 */
177 dpr->dpr_flags &= ~(DPF_DISPATCHED | DPF_EXECUTING);
178 cv_broadcast(&dpr->dpr_cv);
179 mutex_exit(&dpr->dpr_lock);
180 }
181
182 void
183 ddi_periodic_softintr(int level)
184 {
185 ddi_periodic_impl_t *dpr;
186 VERIFY(level >= PERI_IPL_1 && level <= PERI_IPL_10);
187
188 mutex_enter(&periodics_lock);
189 /*
190 * Pull the first scheduled periodic off the queue for this priority
191 * level:
192 */
193 while ((dpr = list_remove_head(&periodic_softint_queue[level - 1]))
194 != NULL) {
195 mutex_exit(&periodics_lock);
196 /*
197 * And execute it:
198 */
199 periodic_execute(dpr);
200 mutex_enter(&periodics_lock);
201 }
202 mutex_exit(&periodics_lock);
203 }
204
205 void
206 ddi_periodic_init(void)
207 {
208 int i;
209
210 /*
211 * Create a kmem_cache for request tracking objects, and a list
212 * to store them in so we can later delete based on opaque handles:
213 */
214 periodic_cache = kmem_cache_create("ddi_periodic",
215 sizeof (ddi_periodic_impl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
216 list_create(&periodics, sizeof (ddi_periodic_impl_t),
217 offsetof(ddi_periodic_impl_t, dpr_link));
218
219 /*
220 * Initialise the identifier space for ddi_periodic_add(9F):
221 */
222 periodic_id_space = id_space_create("ddi_periodic", 1,
223 ddi_periodic_max_id);
224
225 /*
226 * Initialise the request queue for each soft interrupt level:
227 */
228 for (i = PERI_IPL_1; i <= PERI_IPL_10; i++) {
229 list_create(&periodic_softint_queue[i - 1],
230 sizeof (ddi_periodic_impl_t), offsetof(ddi_periodic_impl_t,
231 dpr_softint_link));
232 }
233
234 /*
235 * Create the taskq for running PERI_IPL_0 handlers. This taskq will
236 * _only_ be used with taskq_dispatch_ent(), and a taskq_ent_t
237 * pre-allocated with the ddi_periodic_impl_t.
238 */
239 periodic_taskq = taskq_create_instance("ddi_periodic_taskq", -1,
240 ddi_periodic_taskq_threadcount, maxclsyspri, 0, 0, 0);
241
242 /*
243 * Initialize the mutex lock used for the soft interrupt request
244 * queues.
245 */
246 mutex_init(&periodics_lock, NULL, MUTEX_ADAPTIVE, NULL);
247 }
248
249 static void
250 periodic_cyclic_handler(void *arg)
251 {
252 extern void sir_on(int);
253 ddi_periodic_impl_t *dpr = arg;
254
255 mutex_enter(&dpr->dpr_lock);
256 /*
257 * If we've been cancelled, or we're already dispatched, then exit
258 * immediately:
259 */
260 if (dpr->dpr_flags & (DPF_CANCELLED | DPF_DISPATCHED)) {
261 mutex_exit(&dpr->dpr_lock);
262 return;
263 }
264 VERIFY(!(dpr->dpr_flags & DPF_EXECUTING));
265
266 /*
267 * This periodic is not presently dispatched, so dispatch it now:
268 */
269 dpr->dpr_flags |= DPF_DISPATCHED;
270 mutex_exit(&dpr->dpr_lock);
271
272 if (dpr->dpr_level == PERI_IPL_0) {
273 /*
274 * DDI_IPL_0 periodics are dispatched onto the taskq:
275 */
276 taskq_dispatch_ent(periodic_taskq, periodic_execute,
277 dpr, 0, &dpr->dpr_taskq_ent);
278 } else {
279 /*
280 * Higher priority periodics are handled by a soft
281 * interrupt handler. Enqueue us for processing and
282 * fire the soft interrupt:
283 */
284 mutex_enter(&periodics_lock);
285 list_insert_tail(&periodic_softint_queue[dpr->dpr_level - 1],
286 dpr);
287 mutex_exit(&periodics_lock);
288
289 /*
290 * Raise the soft interrupt level for this periodic:
291 */
292 sir_on(dpr->dpr_level);
293 }
294 }
295
296 static void
297 periodic_destroy(ddi_periodic_impl_t *dpr)
298 {
299 if (dpr == NULL)
300 return;
301
302 /*
303 * By now, we should have a periodic that is not busy, and has been
304 * cancelled:
305 */
306 VERIFY(dpr->dpr_flags == DPF_CANCELLED);
307
308 id_free(periodic_id_space, dpr->dpr_id);
309 kmem_cache_free(periodic_cache, dpr);
310 }
311
312 static ddi_periodic_impl_t *
313 periodic_create(void)
314 {
315 ddi_periodic_impl_t *dpr;
316
317 dpr = kmem_cache_alloc(periodic_cache, KM_SLEEP);
318 bzero(dpr, sizeof (*dpr));
319 dpr->dpr_id = id_alloc(periodic_id_space);
320 mutex_init(&dpr->dpr_lock, NULL, MUTEX_ADAPTIVE, NULL);
321 cv_init(&dpr->dpr_cv, NULL, CV_DEFAULT, NULL);
322
323 return (dpr);
324 }
325
326 timeout_t
327 i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level)
328 {
329 cyc_handler_t cyh;
330 cyc_time_t cyt;
331 ddi_periodic_impl_t *dpr;
332
333 VERIFY(func != NULL);
334 VERIFY(level >= 0 && level <= 10);
335
336 /*
337 * Allocate object to track this periodic:
338 */
339 dpr = periodic_create();
340 dpr->dpr_level = level;
341 dpr->dpr_handler = func;
342 dpr->dpr_arg = arg;
343
344 /*
345 * The resolution must be finer than or equal to
346 * the requested interval. If it's not, set the resolution
347 * to the interval.
348 * Note. There is a restriction currently. Regardless of the
349 * clock resolution used here, 10ms is set as the timer resolution.
350 * Even on the 1ms resolution timer, the minimum interval is 10ms.
351 */
352 if (ddi_periodic_resolution > interval) {
353 uintptr_t pc = (uintptr_t)dpr->dpr_handler;
354 ulong_t off;
355 cmn_err(CE_WARN,
356 "The periodic timeout (handler=%s, interval=%lld) "
357 "requests a finer interval than the supported resolution. "
358 "It rounds up to %lld\n", kobj_getsymname(pc, &off),
359 interval, ddi_periodic_resolution);
360 interval = ddi_periodic_resolution;
361 }
362
363 /*
364 * If the specified interval is already multiples of
365 * the resolution, use it as is. Otherwise, it rounds
366 * up to multiples of the timer resolution.
367 */
368 dpr->dpr_interval = roundup(interval, ddi_periodic_resolution);
369
370 /*
371 * Create the underlying cyclic:
372 */
373 cyh.cyh_func = periodic_cyclic_handler;
374 cyh.cyh_arg = dpr;
375 cyh.cyh_level = CY_LOCK_LEVEL;
376
377 cyt.cyt_when = roundup(gethrtime() + dpr->dpr_interval,
378 ddi_periodic_resolution);
379 cyt.cyt_interval = dpr->dpr_interval;
380
381 mutex_enter(&cpu_lock);
382 dpr->dpr_cyclic_id = cyclic_add(&cyh, &cyt);
383 mutex_exit(&cpu_lock);
384
385 /*
386 * Make the id visible to ddi_periodic_delete(9F) before we
387 * return it:
388 */
389 mutex_enter(&periodics_lock);
390 list_insert_tail(&periodics, dpr);
391 mutex_exit(&periodics_lock);
392
393 return ((timeout_t)(uintptr_t)dpr->dpr_id);
394 }
395
396 /*
397 * void
398 * i_untimeout(timeout_t req)
399 *
400 * Overview
401 * i_untimeout() is an internal function canceling the i_timeout()
402 * request previously issued.
403 * This function is used for ddi_periodic_delete(9F).
404 *
405 * Argument
406 * req: timeout_t opaque value i_timeout() returned previously.
407 *
408 * Return value
409 * Nothing.
410 *
411 * Caller's context
412 * i_untimeout() can be called in user, kernel or interrupt context.
413 * It cannot be called in high interrupt context.
414 *
415 * Note. This function is used by ddi_periodic_delete(), which cannot
416 * be called in interrupt context. As a result, this function is called
417 * in user or kernel context only in practice.
418 */
419 void
420 i_untimeout(timeout_t id)
421 {
422 ddi_periodic_impl_t *dpr;
423
424 /*
425 * Find the periodic in the list of all periodics and remove it.
426 * If we find in (and remove it from) the global list, we have
427 * license to free it once it is no longer busy.
428 */
429 mutex_enter(&periodics_lock);
430 for (dpr = list_head(&periodics); dpr != NULL; dpr =
431 list_next(&periodics, dpr)) {
432 if (dpr->dpr_id == (id_t)(uintptr_t)id) {
433 list_remove(&periodics, dpr);
434 break;
435 }
436 }
437 mutex_exit(&periodics_lock);
438
439 /*
440 * We could not find a periodic for this id, so bail out:
441 */
442 if (dpr == NULL)
443 return;
444
445 mutex_enter(&dpr->dpr_lock);
446 /*
447 * We should be the only one trying to cancel this periodic:
448 */
449 VERIFY(!(dpr->dpr_flags & DPF_CANCELLED));
450 /*
451 * Mark the periodic as cancelled:
452 */
453 dpr->dpr_flags |= DPF_CANCELLED;
454 mutex_exit(&dpr->dpr_lock);
455
456 /*
457 * Cancel our cyclic. cyclic_remove() guarantees that the cyclic
458 * handler will not run again after it returns. Note that the cyclic
459 * handler merely _dispatches_ the periodic, so this does _not_ mean
460 * the periodic handler is also finished running.
461 */
462 mutex_enter(&cpu_lock);
463 cyclic_remove(dpr->dpr_cyclic_id);
464 mutex_exit(&cpu_lock);
465
466 /*
467 * Wait until the periodic handler is no longer running:
468 */
469 mutex_enter(&dpr->dpr_lock);
470 while (dpr->dpr_flags & (DPF_DISPATCHED | DPF_EXECUTING)) {
471 cv_wait(&dpr->dpr_cv, &dpr->dpr_lock);
472 }
473 mutex_exit(&dpr->dpr_lock);
474
475 periodic_destroy(dpr);
476 }
|