1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 27 */ 28 29 #include <sys/sysmacros.h> 30 #include <sys/stack.h> 31 #include <sys/cpuvar.h> 32 #include <sys/ivintr.h> 33 #include <sys/intreg.h> 34 #include <sys/membar.h> 35 #include <sys/kmem.h> 36 #include <sys/intr.h> 37 #include <sys/sunddi.h> 38 #include <sys/sunndi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/privregs.h> 41 #include <sys/systm.h> 42 #include <sys/archsystm.h> 43 #include <sys/machsystm.h> 44 #include <sys/x_call.h> 45 #include <vm/seg_kp.h> 46 #include <sys/debug.h> 47 #include <sys/cyclic.h> 48 #include <sys/kdi_impl.h> 49 #include <sys/ddi_periodic.h> 50 51 #include <sys/cpu_sgnblk_defs.h> 52 53 /* Global locks which protect the interrupt distribution lists */ 54 static kmutex_t intr_dist_lock; 55 static kmutex_t intr_dist_cpu_lock; 56 57 /* Head of the interrupt distribution lists */ 58 static struct intr_dist *intr_dist_head = NULL; 59 static struct intr_dist *intr_dist_whead = NULL; 60 61 static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */ 62 uint64_t *siron_cpu_inum = NULL; 63 uint64_t siron_poke_cpu_inum; 64 static int siron_cpu_setup(cpu_setup_t, int, void *); 65 extern uint_t softlevel1(); 66 67 static uint64_t siron1_inum; /* backward compatibility */ 68 uint64_t poke_cpu_inum; 69 uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2); 70 uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2); 71 72 /* 73 * Variable to enable/disable printing a message when an invalid vecintr 74 * is received. 75 */ 76 uint_t ignore_invalid_vecintr = 0; 77 78 /* 79 * Note:- 80 * siron_pending was originally created to prevent a resource over consumption 81 * bug in setsoftint(exhaustion of interrupt pool free list). 82 * It's original intention is obsolete with the use of iv_pending in 83 * setsoftint. However, siron_pending stayed around, acting as a second 84 * gatekeeper preventing soft interrupts from being queued. In this capacity, 85 * it can lead to hangs on MP systems, where due to global visibility issues 86 * it can end up set while iv_pending is reset, preventing soft interrupts from 87 * ever being processed. In addition to its gatekeeper role, init_intr also 88 * uses it to flag the situation where siron() was called before siron_inum has 89 * been defined. 90 * 91 * siron() does not need an extra gatekeeper; any cpu that wishes should be 92 * allowed to queue a soft interrupt. It is softint()'s job to ensure 93 * correct handling of the queues. Therefore, siron_pending has been 94 * stripped of its gatekeeper task, retaining only its intr_init job, where 95 * it indicates that there is a pending need to call siron(). 96 */ 97 static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */ 98 static int siron1_pending; /* backward compatibility */ 99 100 int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 101 int intr_dist_debug = 0; 102 int32_t intr_dist_weight_max = 1; 103 int32_t intr_dist_weight_maxmax = 1000; 104 int intr_dist_weight_maxfactor = 2; 105 #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 106 107 /* 108 * intr_init() - Interrupt initialization 109 * Initialize the system's interrupt vector table. 110 */ 111 void 112 intr_init(cpu_t *cp) 113 { 114 int i; 115 extern uint_t softlevel1(); 116 117 init_ivintr(); 118 REGISTER_BBUS_INTR(); 119 120 /* 121 * Register these software interrupts for ddi timer. 122 * Software interrupts up to the level 10 are supported. 123 */ 124 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 125 siron_inum[i - 1] = add_softintr(i, 126 (softintrfunc)ddi_periodic_softintr, 127 (caddr_t)(uintptr_t)(i), SOFTINT_ST); 128 } 129 130 siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST); 131 poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT); 132 siron_poke_cpu_inum = add_softintr(PIL_13, 133 siron_poke_cpu_intr, 0, SOFTINT_MT); 134 cp->cpu_m.poke_cpu_outstanding = B_FALSE; 135 136 mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 137 mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 138 139 /* 140 * A soft interrupt may have been requested prior to the initialization 141 * of soft interrupts. Soft interrupts can't be dispatched until after 142 * init_intr(), so we have to wait until now before we can dispatch the 143 * pending soft interrupt (if any). 144 */ 145 for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) { 146 if (siron_pending[i-1]) { 147 siron_pending[i-1] = 0; 148 sir_on(i); 149 } 150 } 151 if (siron1_pending) { 152 siron1_pending = 0; 153 siron(); 154 } 155 } 156 157 /* 158 * poke_cpu_intr - fall through when poke_cpu calls 159 */ 160 /* ARGSUSED */ 161 uint_t 162 poke_cpu_intr(caddr_t arg1, caddr_t arg2) 163 { 164 CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 165 membar_stld_stst(); 166 return (1); 167 } 168 169 /* 170 * Trigger software interrupts dedicated to ddi timer. 171 */ 172 void 173 sir_on(int level) 174 { 175 ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10); 176 if (siron_inum[level-1]) 177 setsoftint(siron_inum[level-1]); 178 else 179 siron_pending[level-1] = 1; 180 } 181 182 /* 183 * kmdb uses siron (and thus setsoftint) while the world is stopped in order to 184 * inform its driver component that there's work to be done. We need to keep 185 * DTrace from instrumenting kmdb's siron and setsoftint. We duplicate siron, 186 * giving kmdb's version a kdi_ prefix to keep DTrace at bay. The 187 * implementation of setsoftint is complicated enough that we don't want to 188 * duplicate it, but at the same time we don't want to preclude tracing either. 189 * The meat of setsoftint() therefore goes into kdi_setsoftint, with 190 * setsoftint() implemented as a wrapper. This allows tracing, while still 191 * providing a way for kmdb to sneak in unmolested. 192 */ 193 void 194 kdi_siron(void) 195 { 196 if (siron1_inum != 0) 197 kdi_setsoftint(siron1_inum); 198 else 199 siron1_pending = 1; 200 } 201 202 void 203 setsoftint(uint64_t inum) 204 { 205 kdi_setsoftint(inum); 206 } 207 208 /* 209 * Generates softlevel1 interrupt on current CPU if it 210 * is not pending already. 211 */ 212 void 213 siron(void) 214 { 215 uint64_t inum; 216 217 if (siron1_inum != 0) { 218 /* 219 * Once siron_cpu_inum has been allocated, we can 220 * use per-CPU siron inum. 221 */ 222 if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0) 223 inum = siron_cpu_inum[CPU->cpu_id]; 224 else 225 inum = siron1_inum; 226 227 setsoftint(inum); 228 } else 229 siron1_pending = 1; 230 } 231 232 233 static void 234 siron_init(void) 235 { 236 /* 237 * We just allocate memory for per-cpu siron right now. Rest of 238 * the work is done when CPU is configured. 239 */ 240 siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP); 241 } 242 243 /* 244 * This routine creates per-CPU siron inum for CPUs which are 245 * configured during boot. 246 */ 247 void 248 siron_mp_init() 249 { 250 cpu_t *c; 251 252 /* 253 * Get the memory for per-CPU siron inums 254 */ 255 siron_init(); 256 257 mutex_enter(&cpu_lock); 258 c = cpu_list; 259 do { 260 (void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL); 261 } while ((c = c->cpu_next) != cpu_list); 262 263 register_cpu_setup_func(siron_cpu_setup, NULL); 264 mutex_exit(&cpu_lock); 265 } 266 267 /* 268 * siron_poke_cpu_intr - cross-call handler. 269 */ 270 /* ARGSUSED */ 271 uint_t 272 siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2) 273 { 274 /* generate level1 softint */ 275 siron(); 276 return (1); 277 } 278 279 /* 280 * This routine generates a cross-call on target CPU(s). 281 */ 282 void 283 siron_poke_cpu(cpuset_t poke) 284 { 285 int cpuid = CPU->cpu_id; 286 287 if (CPU_IN_SET(poke, cpuid)) { 288 siron(); 289 CPUSET_DEL(poke, cpuid); 290 if (CPUSET_ISNULL(poke)) 291 return; 292 } 293 294 xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0); 295 } 296 297 /* 298 * This callback function allows us to create per-CPU siron inum. 299 */ 300 /* ARGSUSED */ 301 static int 302 siron_cpu_setup(cpu_setup_t what, int id, void *arg) 303 { 304 cpu_t *cp = cpu[id]; 305 306 ASSERT(MUTEX_HELD(&cpu_lock)); 307 ASSERT(cp != NULL); 308 309 switch (what) { 310 case CPU_CONFIG: 311 siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1, 312 (softintrfunc)softlevel1, 0, SOFTINT_ST); 313 break; 314 case CPU_UNCONFIG: 315 (void) rem_softintr(siron_cpu_inum[cp->cpu_id]); 316 siron_cpu_inum[cp->cpu_id] = 0; 317 break; 318 default: 319 break; 320 } 321 322 return (0); 323 } 324 325 /* 326 * no_ivintr() 327 * called by setvecint_tl1() through sys_trap() 328 * vector interrupt received but not valid or not 329 * registered in intr_vec_table 330 * considered as a spurious mondo interrupt 331 */ 332 /* ARGSUSED */ 333 void 334 no_ivintr(struct regs *rp, int inum, int pil) 335 { 336 if (!ignore_invalid_vecintr) 337 cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 338 inum, pil); 339 340 #ifdef DEBUG_VEC_INTR 341 prom_enter_mon(); 342 #endif /* DEBUG_VEC_INTR */ 343 } 344 345 void 346 intr_dequeue_req(uint_t pil, uint64_t inum) 347 { 348 intr_vec_t *iv, *next, *prev; 349 struct machcpu *mcpu; 350 uint32_t clr; 351 processorid_t cpu_id; 352 extern uint_t getpstate(void); 353 354 ASSERT((getpstate() & PSTATE_IE) == 0); 355 356 mcpu = &CPU->cpu_m; 357 cpu_id = CPU->cpu_id; 358 359 iv = (intr_vec_t *)inum; 360 prev = NULL; 361 next = mcpu->intr_head[pil]; 362 363 /* Find a matching entry in the list */ 364 while (next != NULL) { 365 if (next == iv) 366 break; 367 prev = next; 368 next = IV_GET_PIL_NEXT(next, cpu_id); 369 } 370 371 if (next != NULL) { 372 intr_vec_t *next_iv = IV_GET_PIL_NEXT(next, cpu_id); 373 374 /* Remove entry from list */ 375 if (prev != NULL) 376 IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */ 377 else 378 mcpu->intr_head[pil] = next_iv; /* head */ 379 380 if (next_iv == NULL) 381 mcpu->intr_tail[pil] = prev; /* tail */ 382 } 383 384 /* Clear pending interrupts at this level if the list is empty */ 385 if (mcpu->intr_head[pil] == NULL) { 386 clr = 1 << pil; 387 if (pil == PIL_14) 388 clr |= (TICK_INT_MASK | STICK_INT_MASK); 389 wr_clr_softint(clr); 390 } 391 } 392 393 394 /* 395 * Send a directed interrupt of specified interrupt number id to a cpu. 396 */ 397 void 398 send_dirint( 399 int cpuix, /* cpu to be interrupted */ 400 int intr_id) /* interrupt number id */ 401 { 402 xt_one(cpuix, setsoftint_tl1, intr_id, 0); 403 } 404 405 /* 406 * Take the specified CPU out of participation in interrupts. 407 * Called by p_online(2) when a processor is being taken off-line. 408 * This allows interrupt threads being handled on the processor to 409 * complete before the processor is idled. 410 */ 411 int 412 cpu_disable_intr(struct cpu *cp) 413 { 414 ASSERT(MUTEX_HELD(&cpu_lock)); 415 416 /* 417 * Turn off the CPU_ENABLE flag before calling the redistribution 418 * function, since it checks for this in the cpu flags. 419 */ 420 cp->cpu_flags &= ~CPU_ENABLE; 421 422 intr_redist_all_cpus(); 423 424 return (0); 425 } 426 427 /* 428 * Allow the specified CPU to participate in interrupts. 429 * Called by p_online(2) if a processor could not be taken off-line 430 * because of bound threads, in order to resume processing interrupts. 431 * Also called after starting a processor. 432 */ 433 void 434 cpu_enable_intr(struct cpu *cp) 435 { 436 ASSERT(MUTEX_HELD(&cpu_lock)); 437 438 cp->cpu_flags |= CPU_ENABLE; 439 440 intr_redist_all_cpus(); 441 } 442 443 /* 444 * Add function to callback list for intr_redist_all_cpus. We keep two lists, 445 * one for weighted callbacks and one for normal callbacks. Weighted callbacks 446 * are issued to redirect interrupts of a specified weight, from heavy to 447 * light. This allows all the interrupts of a given weight to be redistributed 448 * for all weighted nexus drivers prior to those of less weight. 449 */ 450 static void 451 intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 452 { 453 struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 454 struct intr_dist *iptr; 455 struct intr_dist **pptr; 456 457 ASSERT(func); 458 new->func = func; 459 new->arg = arg; 460 new->next = NULL; 461 462 /* Add to tail so that redistribution occurs in original order. */ 463 mutex_enter(&intr_dist_lock); 464 for (iptr = *phead, pptr = phead; iptr != NULL; 465 pptr = &iptr->next, iptr = iptr->next) { 466 /* check for problems as we locate the tail */ 467 if ((iptr->func == func) && (iptr->arg == arg)) { 468 cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 469 /*NOTREACHED*/ 470 } 471 } 472 *pptr = new; 473 474 mutex_exit(&intr_dist_lock); 475 } 476 477 void 478 intr_dist_add(void (*func)(void *), void *arg) 479 { 480 intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 481 } 482 483 void 484 intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 485 { 486 intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 487 } 488 489 /* 490 * Search for the interrupt distribution structure with the specified 491 * mondo vec reg in the interrupt distribution list. If a match is found, 492 * then delete the entry from the list. The caller is responsible for 493 * modifying the mondo vector registers. 494 */ 495 static void 496 intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 497 { 498 struct intr_dist *iptr; 499 struct intr_dist **vect; 500 501 mutex_enter(&intr_dist_lock); 502 for (iptr = *headp, vect = headp; 503 iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 504 if ((iptr->func == func) && (iptr->arg == arg)) { 505 *vect = iptr->next; 506 kmem_free(iptr, sizeof (struct intr_dist)); 507 mutex_exit(&intr_dist_lock); 508 return; 509 } 510 } 511 512 if (!panicstr) 513 cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 514 mutex_exit(&intr_dist_lock); 515 } 516 517 void 518 intr_dist_rem(void (*func)(void *), void *arg) 519 { 520 intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 521 } 522 523 void 524 intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 525 { 526 intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 527 } 528 529 /* 530 * Initiate interrupt redistribution. Redistribution improves the isolation 531 * associated with interrupt weights by ordering operations from heavy weight 532 * to light weight. When a CPUs orientation changes relative to interrupts, 533 * there is *always* a redistribution to accommodate this change (call to 534 * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 535 * that a redistribution could improve the quality of an initialization. For 536 * example, if you are not using a NIC it may not be attached with s10 (devfs). 537 * If you then configure the NIC (ifconfig), this may cause the NIC to attach 538 * and plumb interrupts. The CPU assignment for the NIC's interrupts is 539 * occurring late, so optimal "isolation" relative to weight is not occurring. 540 * The same applies to detach, although in this case doing the redistribution 541 * might improve "spread" for medium weight devices since the "isolation" of 542 * a higher weight device may no longer be present. 543 * 544 * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 545 * 546 * NB: There is risk associated with automatically triggering execution of the 547 * redistribution code at arbitrary times. The risk comes from the fact that 548 * there is a lot of low-level hardware interaction associated with a 549 * redistribution. At some point we may want this code to perform automatic 550 * redistribution (redistribution thread; trigger timeout when add/remove 551 * weight delta is large enough, and call cv_signal from timeout - causing 552 * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 553 * risky at this time. 554 */ 555 void 556 i_ddi_intr_redist_all_cpus() 557 { 558 mutex_enter(&cpu_lock); 559 INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 560 intr_redist_all_cpus(); 561 mutex_exit(&cpu_lock); 562 } 563 564 /* 565 * Redistribute all interrupts 566 * 567 * This function redistributes all interrupting devices, running the 568 * parent callback functions for each node. 569 */ 570 void 571 intr_redist_all_cpus(void) 572 { 573 struct cpu *cp; 574 struct intr_dist *iptr; 575 int32_t weight, max_weight; 576 577 ASSERT(MUTEX_HELD(&cpu_lock)); 578 mutex_enter(&intr_dist_lock); 579 580 /* 581 * zero cpu_intr_weight on all cpus - it is safe to traverse 582 * cpu_list since we hold cpu_lock. 583 */ 584 cp = cpu_list; 585 do { 586 cp->cpu_intr_weight = 0; 587 } while ((cp = cp->cpu_next) != cpu_list); 588 589 /* 590 * Assume that this redistribution may encounter a device weight 591 * via driver.conf tuning of "ddi-intr-weight" that is at most 592 * intr_dist_weight_maxfactor times larger. 593 */ 594 max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 595 if (max_weight > intr_dist_weight_maxmax) 596 max_weight = intr_dist_weight_maxmax; 597 intr_dist_weight_max = 1; 598 599 INTR_DEBUG((CE_CONT, "intr_dist: " 600 "intr_redist_all_cpus: %d-0\n", max_weight)); 601 602 /* 603 * Redistribute weighted, from heavy to light. The callback that 604 * specifies a weight equal to weight_max should redirect all 605 * interrupts of weight weight_max or greater [weight_max, inf.). 606 * Interrupts of lesser weight should be processed on the call with 607 * the matching weight. This allows all the heaver weight interrupts 608 * on all weighted busses (multiple pci busses) to be redirected prior 609 * to any lesser weight interrupts. 610 */ 611 for (weight = max_weight; weight >= 0; weight--) 612 for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 613 ((void (*)(void *, int32_t, int32_t))iptr->func) 614 (iptr->arg, max_weight, weight); 615 616 /* redistribute normal (non-weighted) interrupts */ 617 for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 618 ((void (*)(void *))iptr->func)(iptr->arg); 619 mutex_exit(&intr_dist_lock); 620 } 621 622 void 623 intr_redist_all_cpus_shutdown(void) 624 { 625 intr_policy = INTR_CURRENT_CPU; 626 intr_redist_all_cpus(); 627 } 628 629 /* 630 * Determine what CPU to target, based on interrupt policy. 631 * 632 * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 633 * advance through interrupt enabled cpus (round-robin). 634 * 635 * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 636 * cpu_intr_weight, round robin when all equal. 637 * 638 * Weighted interrupt distribution provides two things: "spread" of weight 639 * (associated with algorithm itself) and "isolation" (associated with a 640 * particular device weight). A redistribution is what provides optimal 641 * "isolation" of heavy weight interrupts, optimal "spread" of weight 642 * (relative to what came before) is always occurring. 643 * 644 * An interrupt weight is a subjective number that represents the 645 * percentage of a CPU required to service a device's interrupts: the 646 * default weight is 0% (however the algorithm still maintains 647 * round-robin), a network interface controller (NIC) may have a large 648 * weight (35%). Interrupt weight only has meaning relative to the 649 * interrupt weight of other devices: a CPU can be weighted more than 650 * 100%, and a single device might consume more than 100% of a CPU. 651 * 652 * A coarse interrupt weight can be defined by the parent nexus driver 653 * based on bus specific information, like pci class codes. A nexus 654 * driver that supports device interrupt weighting for its children 655 * should call intr_dist_cpuid_add/rem_device_weight(), which adds 656 * and removes the weight of a device from the CPU that an interrupt 657 * is directed at. The quality of initialization improves when the 658 * device interrupt weights more accuracy reflect actual run-time weights, 659 * and as the assignments are ordered from is heavy to light. 660 * 661 * The implementation also supports interrupt weight being specified in 662 * driver.conf files via the property "ddi-intr-weight", which takes 663 * precedence over the nexus supplied weight. This support is added to 664 * permit possible tweaking in the product in response to customer 665 * problems. This is not a formal or committed interface. 666 * 667 * While a weighted approach chooses the CPU providing the best spread 668 * given past weights, less than optimal isolation can result in cases 669 * where heavy weight devices show up last. The nexus driver's interrupt 670 * redistribution logic should use intr_dist_add/rem_weighted so that 671 * interrupts can be redistributed heavy first for optimal isolation. 672 */ 673 uint32_t 674 intr_dist_cpuid(void) 675 { 676 static struct cpu *curr_cpu; 677 struct cpu *start_cpu; 678 struct cpu *new_cpu; 679 struct cpu *cp; 680 int cpuid = -1; 681 682 /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 683 mutex_enter(&intr_dist_cpu_lock); 684 685 switch (intr_policy) { 686 case INTR_CURRENT_CPU: 687 cpuid = CPU->cpu_id; 688 break; 689 690 case INTR_BOOT_CPU: 691 panic("INTR_BOOT_CPU no longer supported."); 692 /*NOTREACHED*/ 693 694 case INTR_FLAT_DIST: 695 case INTR_WEIGHTED_DIST: 696 default: 697 /* 698 * Ensure that curr_cpu is valid - cpu_next will be NULL if 699 * the cpu has been deleted (cpu structs are never freed). 700 */ 701 if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 702 curr_cpu = CPU; 703 704 /* 705 * Advance to online CPU after curr_cpu (round-robin). For 706 * INTR_WEIGHTED_DIST we choose the cpu with the lightest 707 * weight. For a nexus that does not support weight the 708 * default weight of zero is used. We degrade to round-robin 709 * behavior among equal weightes. The default weight is zero 710 * and round-robin behavior continues. 711 * 712 * Disable preemption while traversing cpu_next_onln to 713 * ensure the list does not change. This works because 714 * modifiers of this list and other lists in a struct cpu 715 * call pause_cpus() before making changes. 716 */ 717 kpreempt_disable(); 718 cp = start_cpu = curr_cpu->cpu_next_onln; 719 new_cpu = NULL; 720 do { 721 /* Skip CPUs with interrupts disabled */ 722 if ((cp->cpu_flags & CPU_ENABLE) == 0) 723 continue; 724 725 if (intr_policy == INTR_FLAT_DIST) { 726 /* select CPU */ 727 new_cpu = cp; 728 break; 729 } else if ((new_cpu == NULL) || 730 (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 731 /* Choose if lighter weight */ 732 new_cpu = cp; 733 } 734 } while ((cp = cp->cpu_next_onln) != start_cpu); 735 ASSERT(new_cpu); 736 cpuid = new_cpu->cpu_id; 737 738 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 739 "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 740 741 /* update static pointer for next round-robin */ 742 curr_cpu = new_cpu; 743 kpreempt_enable(); 744 break; 745 } 746 mutex_exit(&intr_dist_cpu_lock); 747 return (cpuid); 748 } 749 750 /* 751 * Add or remove the the weight of a device from a CPUs interrupt weight. 752 * 753 * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 754 * their children to improve the overall quality of interrupt initialization. 755 * 756 * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 757 * among multiple devices (sharing ino) then the nexus should call 758 * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 759 * that share must specify the same cpuid. 760 * 761 * If a nexus driver is unable to determine the cpu at remove_intr time 762 * for some of its interrupts, then it should not call add_device_weight - 763 * intr_dist_cpuid will still provide round-robin. 764 * 765 * An established device weight (from dev_info node) takes precedence over 766 * the weight passed in. If a device weight is not already established 767 * then the passed in nexus weight is established. 768 */ 769 void 770 intr_dist_cpuid_add_device_weight(uint32_t cpuid, 771 dev_info_t *dip, int32_t nweight) 772 { 773 int32_t eweight; 774 775 /* 776 * For non-weighted policy everything has weight of zero (and we get 777 * round-robin distribution from intr_dist_cpuid). 778 * NB: intr_policy is limited to this file. A weighted nexus driver is 779 * calls this rouitne even if intr_policy has been patched to 780 * INTR_FLAG_DIST. 781 */ 782 ASSERT(dip); 783 if (intr_policy != INTR_WEIGHTED_DIST) 784 return; 785 786 eweight = i_ddi_get_intr_weight(dip); 787 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 788 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 789 nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 790 ddi_get_instance(ddi_get_parent(dip)), 791 ddi_driver_name(dip), ddi_get_instance(dip))); 792 793 /* if no establish weight, establish nexus weight */ 794 if (eweight < 0) { 795 if (nweight > 0) 796 (void) i_ddi_set_intr_weight(dip, nweight); 797 else 798 nweight = 0; 799 } else 800 nweight = eweight; /* use established weight */ 801 802 /* Establish exclusion for cpu_intr_weight manipulation */ 803 mutex_enter(&intr_dist_cpu_lock); 804 cpu[cpuid]->cpu_intr_weight += nweight; 805 806 /* update intr_dist_weight_max */ 807 if (nweight > intr_dist_weight_max) 808 intr_dist_weight_max = nweight; 809 mutex_exit(&intr_dist_cpu_lock); 810 } 811 812 void 813 intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 814 { 815 struct cpu *cp; 816 int32_t weight; 817 818 ASSERT(dip); 819 if (intr_policy != INTR_WEIGHTED_DIST) 820 return; 821 822 /* remove weight of device from cpu */ 823 weight = i_ddi_get_intr_weight(dip); 824 if (weight < 0) 825 weight = 0; 826 INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 827 "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 828 ddi_driver_name(ddi_get_parent(dip)), 829 ddi_get_instance(ddi_get_parent(dip)), 830 ddi_driver_name(dip), ddi_get_instance(dip))); 831 832 /* Establish exclusion for cpu_intr_weight manipulation */ 833 mutex_enter(&intr_dist_cpu_lock); 834 cp = cpu[cpuid]; 835 cp->cpu_intr_weight -= weight; 836 if (cp->cpu_intr_weight < 0) 837 cp->cpu_intr_weight = 0; /* sanity */ 838 mutex_exit(&intr_dist_cpu_lock); 839 } 840 841 ulong_t 842 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1) 843 { 844 uint64_t inum; 845 846 inum = add_softintr(pil, func, arg1, SOFTINT_MT); 847 return ((ulong_t)inum); 848 } 849 850 void 851 invoke_softint(processorid_t cpuid, ulong_t hdl) 852 { 853 uint64_t inum = hdl; 854 855 if (cpuid == CPU->cpu_id) 856 setsoftint(inum); 857 else 858 xt_one(cpuid, setsoftint_tl1, inum, 0); 859 } 860 861 void 862 remove_softint(ulong_t hdl) 863 { 864 uint64_t inum = hdl; 865 866 (void) rem_softintr(inum); 867 } 868 869 void 870 sync_softint(cpuset_t set) 871 { 872 xt_sync(set); 873 }