Print this page
OS-2366 ddi_periodic_add(9F) is entirely rubbish
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4/os/intr.c
+++ new/usr/src/uts/sun4/os/intr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 +/*
26 + * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27 + */
25 28
26 29 #include <sys/sysmacros.h>
27 30 #include <sys/stack.h>
28 31 #include <sys/cpuvar.h>
29 32 #include <sys/ivintr.h>
30 33 #include <sys/intreg.h>
31 34 #include <sys/membar.h>
32 35 #include <sys/kmem.h>
33 36 #include <sys/intr.h>
34 37 #include <sys/sunddi.h>
35 38 #include <sys/sunndi.h>
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
36 39 #include <sys/cmn_err.h>
37 40 #include <sys/privregs.h>
38 41 #include <sys/systm.h>
39 42 #include <sys/archsystm.h>
40 43 #include <sys/machsystm.h>
41 44 #include <sys/x_call.h>
42 45 #include <vm/seg_kp.h>
43 46 #include <sys/debug.h>
44 47 #include <sys/cyclic.h>
45 48 #include <sys/kdi_impl.h>
46 -#include <sys/ddi_timer.h>
49 +#include <sys/ddi_periodic.h>
47 50
48 51 #include <sys/cpu_sgnblk_defs.h>
49 52
50 53 /* Global locks which protect the interrupt distribution lists */
51 54 static kmutex_t intr_dist_lock;
52 55 static kmutex_t intr_dist_cpu_lock;
53 56
54 57 /* Head of the interrupt distribution lists */
55 58 static struct intr_dist *intr_dist_head = NULL;
56 59 static struct intr_dist *intr_dist_whead = NULL;
57 60
58 61 static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */
59 62 uint64_t *siron_cpu_inum = NULL;
60 63 uint64_t siron_poke_cpu_inum;
61 64 static int siron_cpu_setup(cpu_setup_t, int, void *);
62 65 extern uint_t softlevel1();
63 66
64 67 static uint64_t siron1_inum; /* backward compatibility */
65 68 uint64_t poke_cpu_inum;
66 69 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
67 70 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
68 71
69 72 /*
70 73 * Variable to enable/disable printing a message when an invalid vecintr
71 74 * is received.
72 75 */
73 76 uint_t ignore_invalid_vecintr = 0;
74 77
75 78 /*
76 79 * Note:-
77 80 * siron_pending was originally created to prevent a resource over consumption
78 81 * bug in setsoftint(exhaustion of interrupt pool free list).
79 82 * It's original intention is obsolete with the use of iv_pending in
80 83 * setsoftint. However, siron_pending stayed around, acting as a second
81 84 * gatekeeper preventing soft interrupts from being queued. In this capacity,
82 85 * it can lead to hangs on MP systems, where due to global visibility issues
83 86 * it can end up set while iv_pending is reset, preventing soft interrupts from
84 87 * ever being processed. In addition to its gatekeeper role, init_intr also
85 88 * uses it to flag the situation where siron() was called before siron_inum has
86 89 * been defined.
87 90 *
88 91 * siron() does not need an extra gatekeeper; any cpu that wishes should be
89 92 * allowed to queue a soft interrupt. It is softint()'s job to ensure
90 93 * correct handling of the queues. Therefore, siron_pending has been
91 94 * stripped of its gatekeeper task, retaining only its intr_init job, where
92 95 * it indicates that there is a pending need to call siron().
93 96 */
94 97 static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */
95 98 static int siron1_pending; /* backward compatibility */
96 99
97 100 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */
98 101 int intr_dist_debug = 0;
99 102 int32_t intr_dist_weight_max = 1;
100 103 int32_t intr_dist_weight_maxmax = 1000;
101 104 int intr_dist_weight_maxfactor = 2;
102 105 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
103 106
104 107 /*
105 108 * intr_init() - Interrupt initialization
106 109 * Initialize the system's interrupt vector table.
107 110 */
108 111 void
109 112 intr_init(cpu_t *cp)
110 113 {
111 114 int i;
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
112 115 extern uint_t softlevel1();
113 116
114 117 init_ivintr();
115 118 REGISTER_BBUS_INTR();
116 119
117 120 /*
118 121 * Register these software interrupts for ddi timer.
119 122 * Software interrupts up to the level 10 are supported.
120 123 */
121 124 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
122 - siron_inum[i-1] = add_softintr(i, (softintrfunc)timer_softintr,
125 + siron_inum[i - 1] = add_softintr(i,
126 + (softintrfunc)ddi_periodic_softintr,
123 127 (caddr_t)(uintptr_t)(i), SOFTINT_ST);
124 128 }
125 129
126 130 siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
127 131 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
128 132 siron_poke_cpu_inum = add_softintr(PIL_13,
129 133 siron_poke_cpu_intr, 0, SOFTINT_MT);
130 134 cp->cpu_m.poke_cpu_outstanding = B_FALSE;
131 135
132 136 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
133 137 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
134 138
135 139 /*
136 140 * A soft interrupt may have been requested prior to the initialization
137 141 * of soft interrupts. Soft interrupts can't be dispatched until after
138 142 * init_intr(), so we have to wait until now before we can dispatch the
139 143 * pending soft interrupt (if any).
140 144 */
141 145 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
142 146 if (siron_pending[i-1]) {
143 147 siron_pending[i-1] = 0;
144 148 sir_on(i);
145 149 }
146 150 }
147 151 if (siron1_pending) {
148 152 siron1_pending = 0;
149 153 siron();
150 154 }
151 155 }
152 156
153 157 /*
154 158 * poke_cpu_intr - fall through when poke_cpu calls
155 159 */
156 160 /* ARGSUSED */
157 161 uint_t
158 162 poke_cpu_intr(caddr_t arg1, caddr_t arg2)
159 163 {
160 164 CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
161 165 membar_stld_stst();
162 166 return (1);
163 167 }
164 168
165 169 /*
166 170 * Trigger software interrupts dedicated to ddi timer.
167 171 */
168 172 void
169 173 sir_on(int level)
170 174 {
171 175 ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10);
172 176 if (siron_inum[level-1])
173 177 setsoftint(siron_inum[level-1]);
174 178 else
175 179 siron_pending[level-1] = 1;
176 180 }
177 181
178 182 /*
179 183 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
180 184 * inform its driver component that there's work to be done. We need to keep
181 185 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron,
182 186 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The
183 187 * implementation of setsoftint is complicated enough that we don't want to
184 188 * duplicate it, but at the same time we don't want to preclude tracing either.
185 189 * The meat of setsoftint() therefore goes into kdi_setsoftint, with
186 190 * setsoftint() implemented as a wrapper. This allows tracing, while still
187 191 * providing a way for kmdb to sneak in unmolested.
188 192 */
189 193 void
190 194 kdi_siron(void)
191 195 {
192 196 if (siron1_inum != 0)
193 197 kdi_setsoftint(siron1_inum);
194 198 else
195 199 siron1_pending = 1;
196 200 }
197 201
198 202 void
199 203 setsoftint(uint64_t inum)
200 204 {
201 205 kdi_setsoftint(inum);
202 206 }
203 207
204 208 /*
205 209 * Generates softlevel1 interrupt on current CPU if it
206 210 * is not pending already.
207 211 */
208 212 void
209 213 siron(void)
210 214 {
211 215 uint64_t inum;
212 216
213 217 if (siron1_inum != 0) {
214 218 /*
215 219 * Once siron_cpu_inum has been allocated, we can
216 220 * use per-CPU siron inum.
217 221 */
218 222 if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0)
219 223 inum = siron_cpu_inum[CPU->cpu_id];
220 224 else
221 225 inum = siron1_inum;
222 226
223 227 setsoftint(inum);
224 228 } else
225 229 siron1_pending = 1;
226 230 }
227 231
228 232
229 233 static void
230 234 siron_init(void)
231 235 {
232 236 /*
233 237 * We just allocate memory for per-cpu siron right now. Rest of
234 238 * the work is done when CPU is configured.
235 239 */
236 240 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
237 241 }
238 242
239 243 /*
240 244 * This routine creates per-CPU siron inum for CPUs which are
241 245 * configured during boot.
242 246 */
243 247 void
244 248 siron_mp_init()
245 249 {
246 250 cpu_t *c;
247 251
248 252 /*
249 253 * Get the memory for per-CPU siron inums
250 254 */
251 255 siron_init();
252 256
253 257 mutex_enter(&cpu_lock);
254 258 c = cpu_list;
255 259 do {
256 260 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
257 261 } while ((c = c->cpu_next) != cpu_list);
258 262
259 263 register_cpu_setup_func(siron_cpu_setup, NULL);
260 264 mutex_exit(&cpu_lock);
261 265 }
262 266
263 267 /*
264 268 * siron_poke_cpu_intr - cross-call handler.
265 269 */
266 270 /* ARGSUSED */
267 271 uint_t
268 272 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
269 273 {
270 274 /* generate level1 softint */
271 275 siron();
272 276 return (1);
273 277 }
274 278
275 279 /*
276 280 * This routine generates a cross-call on target CPU(s).
277 281 */
278 282 void
279 283 siron_poke_cpu(cpuset_t poke)
280 284 {
281 285 int cpuid = CPU->cpu_id;
282 286
283 287 if (CPU_IN_SET(poke, cpuid)) {
284 288 siron();
285 289 CPUSET_DEL(poke, cpuid);
286 290 if (CPUSET_ISNULL(poke))
287 291 return;
288 292 }
289 293
290 294 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
291 295 }
292 296
293 297 /*
294 298 * This callback function allows us to create per-CPU siron inum.
295 299 */
296 300 /* ARGSUSED */
297 301 static int
298 302 siron_cpu_setup(cpu_setup_t what, int id, void *arg)
299 303 {
300 304 cpu_t *cp = cpu[id];
301 305
302 306 ASSERT(MUTEX_HELD(&cpu_lock));
303 307 ASSERT(cp != NULL);
304 308
305 309 switch (what) {
306 310 case CPU_CONFIG:
307 311 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
308 312 (softintrfunc)softlevel1, 0, SOFTINT_ST);
309 313 break;
310 314 case CPU_UNCONFIG:
311 315 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
312 316 siron_cpu_inum[cp->cpu_id] = 0;
313 317 break;
314 318 default:
315 319 break;
316 320 }
317 321
318 322 return (0);
319 323 }
320 324
321 325 /*
322 326 * no_ivintr()
323 327 * called by setvecint_tl1() through sys_trap()
324 328 * vector interrupt received but not valid or not
325 329 * registered in intr_vec_table
326 330 * considered as a spurious mondo interrupt
327 331 */
328 332 /* ARGSUSED */
329 333 void
330 334 no_ivintr(struct regs *rp, int inum, int pil)
331 335 {
332 336 if (!ignore_invalid_vecintr)
333 337 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
334 338 inum, pil);
335 339
336 340 #ifdef DEBUG_VEC_INTR
337 341 prom_enter_mon();
338 342 #endif /* DEBUG_VEC_INTR */
339 343 }
340 344
341 345 void
342 346 intr_dequeue_req(uint_t pil, uint64_t inum)
343 347 {
344 348 intr_vec_t *iv, *next, *prev;
345 349 struct machcpu *mcpu;
346 350 uint32_t clr;
347 351 processorid_t cpu_id;
348 352 extern uint_t getpstate(void);
349 353
350 354 ASSERT((getpstate() & PSTATE_IE) == 0);
351 355
352 356 mcpu = &CPU->cpu_m;
353 357 cpu_id = CPU->cpu_id;
354 358
355 359 iv = (intr_vec_t *)inum;
356 360 prev = NULL;
357 361 next = mcpu->intr_head[pil];
358 362
359 363 /* Find a matching entry in the list */
360 364 while (next != NULL) {
361 365 if (next == iv)
362 366 break;
363 367 prev = next;
364 368 next = IV_GET_PIL_NEXT(next, cpu_id);
365 369 }
366 370
367 371 if (next != NULL) {
368 372 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id);
369 373
370 374 /* Remove entry from list */
371 375 if (prev != NULL)
372 376 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
373 377 else
374 378 mcpu->intr_head[pil] = next_iv; /* head */
375 379
376 380 if (next_iv == NULL)
377 381 mcpu->intr_tail[pil] = prev; /* tail */
378 382 }
379 383
380 384 /* Clear pending interrupts at this level if the list is empty */
381 385 if (mcpu->intr_head[pil] == NULL) {
382 386 clr = 1 << pil;
383 387 if (pil == PIL_14)
384 388 clr |= (TICK_INT_MASK | STICK_INT_MASK);
385 389 wr_clr_softint(clr);
386 390 }
387 391 }
388 392
389 393
390 394 /*
391 395 * Send a directed interrupt of specified interrupt number id to a cpu.
392 396 */
393 397 void
394 398 send_dirint(
395 399 int cpuix, /* cpu to be interrupted */
396 400 int intr_id) /* interrupt number id */
397 401 {
398 402 xt_one(cpuix, setsoftint_tl1, intr_id, 0);
399 403 }
400 404
401 405 /*
402 406 * Take the specified CPU out of participation in interrupts.
403 407 * Called by p_online(2) when a processor is being taken off-line.
404 408 * This allows interrupt threads being handled on the processor to
405 409 * complete before the processor is idled.
406 410 */
407 411 int
408 412 cpu_disable_intr(struct cpu *cp)
409 413 {
410 414 ASSERT(MUTEX_HELD(&cpu_lock));
411 415
412 416 /*
413 417 * Turn off the CPU_ENABLE flag before calling the redistribution
414 418 * function, since it checks for this in the cpu flags.
415 419 */
416 420 cp->cpu_flags &= ~CPU_ENABLE;
417 421
418 422 intr_redist_all_cpus();
419 423
420 424 return (0);
421 425 }
422 426
423 427 /*
424 428 * Allow the specified CPU to participate in interrupts.
425 429 * Called by p_online(2) if a processor could not be taken off-line
426 430 * because of bound threads, in order to resume processing interrupts.
427 431 * Also called after starting a processor.
428 432 */
429 433 void
430 434 cpu_enable_intr(struct cpu *cp)
431 435 {
432 436 ASSERT(MUTEX_HELD(&cpu_lock));
433 437
434 438 cp->cpu_flags |= CPU_ENABLE;
435 439
436 440 intr_redist_all_cpus();
437 441 }
438 442
439 443 /*
440 444 * Add function to callback list for intr_redist_all_cpus. We keep two lists,
441 445 * one for weighted callbacks and one for normal callbacks. Weighted callbacks
442 446 * are issued to redirect interrupts of a specified weight, from heavy to
443 447 * light. This allows all the interrupts of a given weight to be redistributed
444 448 * for all weighted nexus drivers prior to those of less weight.
445 449 */
446 450 static void
447 451 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
448 452 {
449 453 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
450 454 struct intr_dist *iptr;
451 455 struct intr_dist **pptr;
452 456
453 457 ASSERT(func);
454 458 new->func = func;
455 459 new->arg = arg;
456 460 new->next = NULL;
457 461
458 462 /* Add to tail so that redistribution occurs in original order. */
459 463 mutex_enter(&intr_dist_lock);
460 464 for (iptr = *phead, pptr = phead; iptr != NULL;
461 465 pptr = &iptr->next, iptr = iptr->next) {
462 466 /* check for problems as we locate the tail */
463 467 if ((iptr->func == func) && (iptr->arg == arg)) {
464 468 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
465 469 /*NOTREACHED*/
466 470 }
467 471 }
468 472 *pptr = new;
469 473
470 474 mutex_exit(&intr_dist_lock);
471 475 }
472 476
473 477 void
474 478 intr_dist_add(void (*func)(void *), void *arg)
475 479 {
476 480 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
477 481 }
478 482
479 483 void
480 484 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
481 485 {
482 486 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
483 487 }
484 488
485 489 /*
486 490 * Search for the interrupt distribution structure with the specified
487 491 * mondo vec reg in the interrupt distribution list. If a match is found,
488 492 * then delete the entry from the list. The caller is responsible for
489 493 * modifying the mondo vector registers.
490 494 */
491 495 static void
492 496 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
493 497 {
494 498 struct intr_dist *iptr;
495 499 struct intr_dist **vect;
496 500
497 501 mutex_enter(&intr_dist_lock);
498 502 for (iptr = *headp, vect = headp;
499 503 iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
500 504 if ((iptr->func == func) && (iptr->arg == arg)) {
501 505 *vect = iptr->next;
502 506 kmem_free(iptr, sizeof (struct intr_dist));
503 507 mutex_exit(&intr_dist_lock);
504 508 return;
505 509 }
506 510 }
507 511
508 512 if (!panicstr)
509 513 cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
510 514 mutex_exit(&intr_dist_lock);
511 515 }
512 516
513 517 void
514 518 intr_dist_rem(void (*func)(void *), void *arg)
515 519 {
516 520 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
517 521 }
518 522
519 523 void
520 524 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
521 525 {
522 526 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
523 527 }
524 528
525 529 /*
526 530 * Initiate interrupt redistribution. Redistribution improves the isolation
527 531 * associated with interrupt weights by ordering operations from heavy weight
528 532 * to light weight. When a CPUs orientation changes relative to interrupts,
529 533 * there is *always* a redistribution to accommodate this change (call to
530 534 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible
531 535 * that a redistribution could improve the quality of an initialization. For
532 536 * example, if you are not using a NIC it may not be attached with s10 (devfs).
533 537 * If you then configure the NIC (ifconfig), this may cause the NIC to attach
534 538 * and plumb interrupts. The CPU assignment for the NIC's interrupts is
535 539 * occurring late, so optimal "isolation" relative to weight is not occurring.
536 540 * The same applies to detach, although in this case doing the redistribution
537 541 * might improve "spread" for medium weight devices since the "isolation" of
538 542 * a higher weight device may no longer be present.
539 543 *
540 544 * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
541 545 *
542 546 * NB: There is risk associated with automatically triggering execution of the
543 547 * redistribution code at arbitrary times. The risk comes from the fact that
544 548 * there is a lot of low-level hardware interaction associated with a
545 549 * redistribution. At some point we may want this code to perform automatic
546 550 * redistribution (redistribution thread; trigger timeout when add/remove
547 551 * weight delta is large enough, and call cv_signal from timeout - causing
548 552 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
549 553 * risky at this time.
550 554 */
551 555 void
552 556 i_ddi_intr_redist_all_cpus()
553 557 {
554 558 mutex_enter(&cpu_lock);
555 559 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
556 560 intr_redist_all_cpus();
557 561 mutex_exit(&cpu_lock);
558 562 }
559 563
560 564 /*
561 565 * Redistribute all interrupts
562 566 *
563 567 * This function redistributes all interrupting devices, running the
564 568 * parent callback functions for each node.
565 569 */
566 570 void
567 571 intr_redist_all_cpus(void)
568 572 {
569 573 struct cpu *cp;
570 574 struct intr_dist *iptr;
571 575 int32_t weight, max_weight;
572 576
573 577 ASSERT(MUTEX_HELD(&cpu_lock));
574 578 mutex_enter(&intr_dist_lock);
575 579
576 580 /*
577 581 * zero cpu_intr_weight on all cpus - it is safe to traverse
578 582 * cpu_list since we hold cpu_lock.
579 583 */
580 584 cp = cpu_list;
581 585 do {
582 586 cp->cpu_intr_weight = 0;
583 587 } while ((cp = cp->cpu_next) != cpu_list);
584 588
585 589 /*
586 590 * Assume that this redistribution may encounter a device weight
587 591 * via driver.conf tuning of "ddi-intr-weight" that is at most
588 592 * intr_dist_weight_maxfactor times larger.
589 593 */
590 594 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
591 595 if (max_weight > intr_dist_weight_maxmax)
592 596 max_weight = intr_dist_weight_maxmax;
593 597 intr_dist_weight_max = 1;
594 598
595 599 INTR_DEBUG((CE_CONT, "intr_dist: "
596 600 "intr_redist_all_cpus: %d-0\n", max_weight));
597 601
598 602 /*
599 603 * Redistribute weighted, from heavy to light. The callback that
600 604 * specifies a weight equal to weight_max should redirect all
601 605 * interrupts of weight weight_max or greater [weight_max, inf.).
602 606 * Interrupts of lesser weight should be processed on the call with
603 607 * the matching weight. This allows all the heaver weight interrupts
604 608 * on all weighted busses (multiple pci busses) to be redirected prior
605 609 * to any lesser weight interrupts.
606 610 */
607 611 for (weight = max_weight; weight >= 0; weight--)
608 612 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
609 613 ((void (*)(void *, int32_t, int32_t))iptr->func)
610 614 (iptr->arg, max_weight, weight);
611 615
612 616 /* redistribute normal (non-weighted) interrupts */
613 617 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
614 618 ((void (*)(void *))iptr->func)(iptr->arg);
615 619 mutex_exit(&intr_dist_lock);
616 620 }
617 621
618 622 void
619 623 intr_redist_all_cpus_shutdown(void)
620 624 {
621 625 intr_policy = INTR_CURRENT_CPU;
622 626 intr_redist_all_cpus();
623 627 }
624 628
625 629 /*
626 630 * Determine what CPU to target, based on interrupt policy.
627 631 *
628 632 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
629 633 * advance through interrupt enabled cpus (round-robin).
630 634 *
631 635 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
632 636 * cpu_intr_weight, round robin when all equal.
633 637 *
634 638 * Weighted interrupt distribution provides two things: "spread" of weight
635 639 * (associated with algorithm itself) and "isolation" (associated with a
636 640 * particular device weight). A redistribution is what provides optimal
637 641 * "isolation" of heavy weight interrupts, optimal "spread" of weight
638 642 * (relative to what came before) is always occurring.
639 643 *
640 644 * An interrupt weight is a subjective number that represents the
641 645 * percentage of a CPU required to service a device's interrupts: the
642 646 * default weight is 0% (however the algorithm still maintains
643 647 * round-robin), a network interface controller (NIC) may have a large
644 648 * weight (35%). Interrupt weight only has meaning relative to the
645 649 * interrupt weight of other devices: a CPU can be weighted more than
646 650 * 100%, and a single device might consume more than 100% of a CPU.
647 651 *
648 652 * A coarse interrupt weight can be defined by the parent nexus driver
649 653 * based on bus specific information, like pci class codes. A nexus
650 654 * driver that supports device interrupt weighting for its children
651 655 * should call intr_dist_cpuid_add/rem_device_weight(), which adds
652 656 * and removes the weight of a device from the CPU that an interrupt
653 657 * is directed at. The quality of initialization improves when the
654 658 * device interrupt weights more accuracy reflect actual run-time weights,
655 659 * and as the assignments are ordered from is heavy to light.
656 660 *
657 661 * The implementation also supports interrupt weight being specified in
658 662 * driver.conf files via the property "ddi-intr-weight", which takes
659 663 * precedence over the nexus supplied weight. This support is added to
660 664 * permit possible tweaking in the product in response to customer
661 665 * problems. This is not a formal or committed interface.
662 666 *
663 667 * While a weighted approach chooses the CPU providing the best spread
664 668 * given past weights, less than optimal isolation can result in cases
665 669 * where heavy weight devices show up last. The nexus driver's interrupt
666 670 * redistribution logic should use intr_dist_add/rem_weighted so that
667 671 * interrupts can be redistributed heavy first for optimal isolation.
668 672 */
669 673 uint32_t
670 674 intr_dist_cpuid(void)
671 675 {
672 676 static struct cpu *curr_cpu;
673 677 struct cpu *start_cpu;
674 678 struct cpu *new_cpu;
675 679 struct cpu *cp;
676 680 int cpuid = -1;
677 681
678 682 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
679 683 mutex_enter(&intr_dist_cpu_lock);
680 684
681 685 switch (intr_policy) {
682 686 case INTR_CURRENT_CPU:
683 687 cpuid = CPU->cpu_id;
684 688 break;
685 689
686 690 case INTR_BOOT_CPU:
687 691 panic("INTR_BOOT_CPU no longer supported.");
688 692 /*NOTREACHED*/
689 693
690 694 case INTR_FLAT_DIST:
691 695 case INTR_WEIGHTED_DIST:
692 696 default:
693 697 /*
694 698 * Ensure that curr_cpu is valid - cpu_next will be NULL if
695 699 * the cpu has been deleted (cpu structs are never freed).
696 700 */
697 701 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
698 702 curr_cpu = CPU;
699 703
700 704 /*
701 705 * Advance to online CPU after curr_cpu (round-robin). For
702 706 * INTR_WEIGHTED_DIST we choose the cpu with the lightest
703 707 * weight. For a nexus that does not support weight the
704 708 * default weight of zero is used. We degrade to round-robin
705 709 * behavior among equal weightes. The default weight is zero
706 710 * and round-robin behavior continues.
707 711 *
708 712 * Disable preemption while traversing cpu_next_onln to
709 713 * ensure the list does not change. This works because
710 714 * modifiers of this list and other lists in a struct cpu
711 715 * call pause_cpus() before making changes.
712 716 */
713 717 kpreempt_disable();
714 718 cp = start_cpu = curr_cpu->cpu_next_onln;
715 719 new_cpu = NULL;
716 720 do {
717 721 /* Skip CPUs with interrupts disabled */
718 722 if ((cp->cpu_flags & CPU_ENABLE) == 0)
719 723 continue;
720 724
721 725 if (intr_policy == INTR_FLAT_DIST) {
722 726 /* select CPU */
723 727 new_cpu = cp;
724 728 break;
725 729 } else if ((new_cpu == NULL) ||
726 730 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
727 731 /* Choose if lighter weight */
728 732 new_cpu = cp;
729 733 }
730 734 } while ((cp = cp->cpu_next_onln) != start_cpu);
731 735 ASSERT(new_cpu);
732 736 cpuid = new_cpu->cpu_id;
733 737
734 738 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
735 739 "targeted\n", cpuid, new_cpu->cpu_intr_weight));
736 740
737 741 /* update static pointer for next round-robin */
738 742 curr_cpu = new_cpu;
739 743 kpreempt_enable();
740 744 break;
741 745 }
742 746 mutex_exit(&intr_dist_cpu_lock);
743 747 return (cpuid);
744 748 }
745 749
746 750 /*
747 751 * Add or remove the the weight of a device from a CPUs interrupt weight.
748 752 *
749 753 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
750 754 * their children to improve the overall quality of interrupt initialization.
751 755 *
752 756 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
753 757 * among multiple devices (sharing ino) then the nexus should call
754 758 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
755 759 * that share must specify the same cpuid.
756 760 *
757 761 * If a nexus driver is unable to determine the cpu at remove_intr time
758 762 * for some of its interrupts, then it should not call add_device_weight -
759 763 * intr_dist_cpuid will still provide round-robin.
760 764 *
761 765 * An established device weight (from dev_info node) takes precedence over
762 766 * the weight passed in. If a device weight is not already established
763 767 * then the passed in nexus weight is established.
764 768 */
765 769 void
766 770 intr_dist_cpuid_add_device_weight(uint32_t cpuid,
767 771 dev_info_t *dip, int32_t nweight)
768 772 {
769 773 int32_t eweight;
770 774
771 775 /*
772 776 * For non-weighted policy everything has weight of zero (and we get
773 777 * round-robin distribution from intr_dist_cpuid).
774 778 * NB: intr_policy is limited to this file. A weighted nexus driver is
775 779 * calls this rouitne even if intr_policy has been patched to
776 780 * INTR_FLAG_DIST.
777 781 */
778 782 ASSERT(dip);
779 783 if (intr_policy != INTR_WEIGHTED_DIST)
780 784 return;
781 785
782 786 eweight = i_ddi_get_intr_weight(dip);
783 787 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
784 788 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
785 789 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
786 790 ddi_get_instance(ddi_get_parent(dip)),
787 791 ddi_driver_name(dip), ddi_get_instance(dip)));
788 792
789 793 /* if no establish weight, establish nexus weight */
790 794 if (eweight < 0) {
791 795 if (nweight > 0)
792 796 (void) i_ddi_set_intr_weight(dip, nweight);
793 797 else
794 798 nweight = 0;
795 799 } else
796 800 nweight = eweight; /* use established weight */
797 801
798 802 /* Establish exclusion for cpu_intr_weight manipulation */
799 803 mutex_enter(&intr_dist_cpu_lock);
800 804 cpu[cpuid]->cpu_intr_weight += nweight;
801 805
802 806 /* update intr_dist_weight_max */
803 807 if (nweight > intr_dist_weight_max)
804 808 intr_dist_weight_max = nweight;
805 809 mutex_exit(&intr_dist_cpu_lock);
806 810 }
807 811
808 812 void
809 813 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
810 814 {
811 815 struct cpu *cp;
812 816 int32_t weight;
813 817
814 818 ASSERT(dip);
815 819 if (intr_policy != INTR_WEIGHTED_DIST)
816 820 return;
817 821
818 822 /* remove weight of device from cpu */
819 823 weight = i_ddi_get_intr_weight(dip);
820 824 if (weight < 0)
821 825 weight = 0;
822 826 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for "
823 827 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
824 828 ddi_driver_name(ddi_get_parent(dip)),
825 829 ddi_get_instance(ddi_get_parent(dip)),
826 830 ddi_driver_name(dip), ddi_get_instance(dip)));
827 831
828 832 /* Establish exclusion for cpu_intr_weight manipulation */
829 833 mutex_enter(&intr_dist_cpu_lock);
830 834 cp = cpu[cpuid];
831 835 cp->cpu_intr_weight -= weight;
832 836 if (cp->cpu_intr_weight < 0)
833 837 cp->cpu_intr_weight = 0; /* sanity */
834 838 mutex_exit(&intr_dist_cpu_lock);
835 839 }
836 840
837 841 ulong_t
838 842 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1)
839 843 {
840 844 uint64_t inum;
841 845
842 846 inum = add_softintr(pil, func, arg1, SOFTINT_MT);
843 847 return ((ulong_t)inum);
844 848 }
845 849
846 850 void
847 851 invoke_softint(processorid_t cpuid, ulong_t hdl)
848 852 {
849 853 uint64_t inum = hdl;
850 854
851 855 if (cpuid == CPU->cpu_id)
852 856 setsoftint(inum);
853 857 else
854 858 xt_one(cpuid, setsoftint_tl1, inum, 0);
855 859 }
856 860
857 861 void
858 862 remove_softint(ulong_t hdl)
859 863 {
860 864 uint64_t inum = hdl;
861 865
862 866 (void) rem_softintr(inum);
863 867 }
864 868
865 869 void
866 870 sync_softint(cpuset_t set)
867 871 {
868 872 xt_sync(set);
869 873 }
↓ open down ↓ |
737 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX