1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009-2010, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/x86_archext.h> 31 #include <sys/machsystm.h> 32 #include <sys/x_call.h> 33 #include <sys/stat.h> 34 #include <acpica/include/acpi.h> 35 #include <sys/acpica.h> 36 #include <sys/cpu_acpi.h> 37 #include <sys/cpu_idle.h> 38 #include <sys/cpupm.h> 39 #include <sys/cpu_event.h> 40 #include <sys/hpet.h> 41 #include <sys/archsystm.h> 42 #include <vm/hat_i86.h> 43 #include <sys/dtrace.h> 44 #include <sys/sdt.h> 45 #include <sys/callb.h> 46 47 #define CSTATE_USING_HPET 1 48 #define CSTATE_USING_LAT 2 49 50 #define CPU_IDLE_STOP_TIMEOUT 1000 51 52 extern void cpu_idle_adaptive(void); 53 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 54 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 55 56 static int cpu_idle_init(cpu_t *); 57 static void cpu_idle_fini(cpu_t *); 58 static void cpu_idle_stop(cpu_t *); 59 static boolean_t cpu_deep_idle_callb(void *arg, int code); 60 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 61 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 62 63 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer); 64 65 /* 66 * the flag of always-running local APIC timer. 67 * the flag of HPET Timer use in deep cstate. 68 */ 69 static boolean_t cpu_cstate_arat = B_FALSE; 70 static boolean_t cpu_cstate_hpet = B_FALSE; 71 72 /* 73 * Interfaces for modules implementing Intel's deep c-state. 74 */ 75 cpupm_state_ops_t cpu_idle_ops = { 76 "Generic ACPI C-state Support", 77 cpu_idle_init, 78 cpu_idle_fini, 79 NULL, 80 cpu_idle_stop 81 }; 82 83 static kmutex_t cpu_idle_callb_mutex; 84 static callb_id_t cpu_deep_idle_callb_id; 85 static callb_id_t cpu_idle_cpr_callb_id; 86 static uint_t cpu_idle_cfg_state; 87 88 static kmutex_t cpu_idle_mutex; 89 90 cpu_idle_kstat_t cpu_idle_kstat = { 91 { "address_space_id", KSTAT_DATA_STRING }, 92 { "latency", KSTAT_DATA_UINT32 }, 93 { "power", KSTAT_DATA_UINT32 }, 94 }; 95 96 /* 97 * kstat update function of the c-state info 98 */ 99 static int 100 cpu_idle_kstat_update(kstat_t *ksp, int flag) 101 { 102 cpu_acpi_cstate_t *cstate = ksp->ks_private; 103 104 if (flag == KSTAT_WRITE) { 105 return (EACCES); 106 } 107 108 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 109 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 110 "FFixedHW"); 111 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 112 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 113 "SystemIO"); 114 } else { 115 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 116 "Unsupported"); 117 } 118 119 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 120 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 121 122 return (0); 123 } 124 125 /* 126 * Used during configuration callbacks to manage implementation specific 127 * details of the hardware timer used during Deep C-state. 128 */ 129 boolean_t 130 cstate_timer_callback(int code) 131 { 132 if (cpu_cstate_arat) { 133 return (B_TRUE); 134 } else if (cpu_cstate_hpet) { 135 return (hpet.callback(code)); 136 } 137 return (B_FALSE); 138 } 139 140 /* 141 * Some Local APIC Timers do not work during Deep C-states. 142 * The Deep C-state idle function uses this function to ensure it is using a 143 * hardware timer that works during Deep C-states. This function also 144 * switches the timer back to the LACPI Timer after Deep C-state. 145 */ 146 static boolean_t 147 cstate_use_timer(hrtime_t *lapic_expire, int timer) 148 { 149 if (cpu_cstate_arat) 150 return (B_TRUE); 151 152 /* 153 * We have to return B_FALSE if no arat or hpet support 154 */ 155 if (!cpu_cstate_hpet) 156 return (B_FALSE); 157 158 switch (timer) { 159 case CSTATE_USING_HPET: 160 return (hpet.use_hpet_timer(lapic_expire)); 161 case CSTATE_USING_LAT: 162 hpet.use_lapic_timer(*lapic_expire); 163 return (B_TRUE); 164 default: 165 return (B_FALSE); 166 } 167 } 168 169 /* 170 * c-state wakeup function. 171 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 172 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 173 */ 174 void 175 cstate_wakeup(cpu_t *cp, int bound) 176 { 177 struct machcpu *mcpu = &(cp->cpu_m); 178 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 179 cpupart_t *cpu_part; 180 uint_t cpu_found; 181 processorid_t cpu_sid; 182 183 cpu_part = cp->cpu_part; 184 cpu_sid = cp->cpu_seqid; 185 /* 186 * Clear the halted bit for that CPU since it will be woken up 187 * in a moment. 188 */ 189 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 190 /* 191 * Clear the halted bit for that CPU since it will be 192 * poked in a moment. 193 */ 194 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 195 196 /* 197 * We may find the current CPU present in the halted cpuset 198 * if we're in the context of an interrupt that occurred 199 * before we had a chance to clear our bit in cpu_idle(). 200 * Waking ourself is obviously unnecessary, since if 201 * we're here, we're not halted. 202 */ 203 if (cp != CPU) { 204 /* 205 * Use correct wakeup mechanism 206 */ 207 if ((mcpu_mwait != NULL) && 208 (*mcpu_mwait == MWAIT_HALTED)) 209 MWAIT_WAKEUP(cp); 210 else 211 poke_cpu(cp->cpu_id); 212 } 213 return; 214 } else { 215 /* 216 * This cpu isn't halted, but it's idle or undergoing a 217 * context switch. No need to awaken anyone else. 218 */ 219 if (cp->cpu_thread == cp->cpu_idle_thread || 220 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 221 return; 222 } 223 224 /* 225 * No need to wake up other CPUs if the thread we just enqueued 226 * is bound. 227 */ 228 if (bound) 229 return; 230 231 232 /* 233 * See if there's any other halted CPUs. If there are, then 234 * select one, and awaken it. 235 * It's possible that after we find a CPU, somebody else 236 * will awaken it before we get the chance. 237 * In that case, look again. 238 */ 239 do { 240 cpu_found = bitset_find(&cpu_part->cp_haltset); 241 if (cpu_found == (uint_t)-1) 242 return; 243 244 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 245 cpu_found) < 0); 246 247 /* 248 * Must use correct wakeup mechanism to avoid lost wakeup of 249 * alternate cpu. 250 */ 251 if (cpu_found != CPU->cpu_seqid) { 252 mcpu_mwait = cpu_seq[cpu_found]->cpu_m.mcpu_mwait; 253 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 254 MWAIT_WAKEUP(cpu_seq[cpu_found]); 255 else 256 poke_cpu(cpu_seq[cpu_found]->cpu_id); 257 } 258 } 259 260 /* 261 * Function called by CPU idle notification framework to check whether CPU 262 * has been awakened. It will be called with interrupt disabled. 263 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle 264 * notification framework. 265 */ 266 static void 267 acpi_cpu_mwait_check_wakeup(void *arg) 268 { 269 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 270 271 ASSERT(arg != NULL); 272 if (*mcpu_mwait != MWAIT_HALTED) { 273 /* 274 * CPU has been awakened, notify CPU idle notification system. 275 */ 276 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 277 } else { 278 /* 279 * Toggle interrupt flag to detect pending interrupts. 280 * If interrupt happened, do_interrupt() will notify CPU idle 281 * notification framework so no need to call cpu_idle_exit() 282 * here. 283 */ 284 sti(); 285 SMT_PAUSE(); 286 cli(); 287 } 288 } 289 290 static void 291 acpi_cpu_mwait_ipi_check_wakeup(void *arg) 292 { 293 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; 294 295 ASSERT(arg != NULL); 296 if (*mcpu_mwait != MWAIT_WAKEUP_IPI) { 297 /* 298 * CPU has been awakened, notify CPU idle notification system. 299 */ 300 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 301 } else { 302 /* 303 * Toggle interrupt flag to detect pending interrupts. 304 * If interrupt happened, do_interrupt() will notify CPU idle 305 * notification framework so no need to call cpu_idle_exit() 306 * here. 307 */ 308 sti(); 309 SMT_PAUSE(); 310 cli(); 311 } 312 } 313 314 /*ARGSUSED*/ 315 static void 316 acpi_cpu_check_wakeup(void *arg) 317 { 318 /* 319 * Toggle interrupt flag to detect pending interrupts. 320 * If interrupt happened, do_interrupt() will notify CPU idle 321 * notification framework so no need to call cpu_idle_exit() here. 322 */ 323 sti(); 324 SMT_PAUSE(); 325 cli(); 326 } 327 328 /* 329 * enter deep c-state handler 330 */ 331 static void 332 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 333 { 334 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 335 cpu_t *cpup = CPU; 336 processorid_t cpu_sid = cpup->cpu_seqid; 337 cpupart_t *cp = cpup->cpu_part; 338 hrtime_t lapic_expire; 339 uint8_t type = cstate->cs_addrspace_id; 340 uint32_t cs_type = cstate->cs_type; 341 int hset_update = 1; 342 boolean_t using_timer; 343 cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup; 344 345 /* 346 * Set our mcpu_mwait here, so we can tell if anyone tries to 347 * wake us between now and when we call mwait. No other cpu will 348 * attempt to set our mcpu_mwait until we add ourself to the haltset. 349 */ 350 if (mcpu_mwait) { 351 if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 352 *mcpu_mwait = MWAIT_WAKEUP_IPI; 353 check_func = &acpi_cpu_mwait_ipi_check_wakeup; 354 } else { 355 *mcpu_mwait = MWAIT_HALTED; 356 check_func = &acpi_cpu_mwait_check_wakeup; 357 } 358 } 359 360 /* 361 * If this CPU is online, and there are multiple CPUs 362 * in the system, then we should note our halting 363 * by adding ourselves to the partition's halted CPU 364 * bitmap. This allows other CPUs to find/awaken us when 365 * work becomes available. 366 */ 367 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 368 hset_update = 0; 369 370 /* 371 * Add ourselves to the partition's halted CPUs bitmask 372 * and set our HALTED flag, if necessary. 373 * 374 * When a thread becomes runnable, it is placed on the queue 375 * and then the halted cpuset is checked to determine who 376 * (if anyone) should be awakened. We therefore need to first 377 * add ourselves to the halted cpuset, and and then check if there 378 * is any work available. 379 * 380 * Note that memory barriers after updating the HALTED flag 381 * are not necessary since an atomic operation (updating the bitmap) 382 * immediately follows. On x86 the atomic operation acts as a 383 * memory barrier for the update of cpu_disp_flags. 384 */ 385 if (hset_update) { 386 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 387 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 388 } 389 390 /* 391 * Check to make sure there's really nothing to do. 392 * Work destined for this CPU may become available after 393 * this check. We'll be notified through the clearing of our 394 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 395 * 396 * disp_anywork() checks disp_nrunnable, so we do not have to later. 397 */ 398 if (disp_anywork()) { 399 if (hset_update) { 400 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 401 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 402 } 403 return; 404 } 405 406 /* 407 * We're on our way to being halted. 408 * 409 * The local APIC timer can stop in ACPI C2 and deeper c-states. 410 * Try to program the HPET hardware to substitute for this CPU's 411 * LAPIC timer. 412 * cstate_use_timer() could disable the LAPIC Timer. Make sure 413 * to start the LAPIC Timer again before leaving this function. 414 * 415 * Disable interrupts here so we will awaken immediately after halting 416 * if someone tries to poke us between now and the time we actually 417 * halt. 418 */ 419 cli(); 420 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET); 421 422 /* 423 * We check for the presence of our bit after disabling interrupts. 424 * If it's cleared, we'll return. If the bit is cleared after 425 * we check then the cstate_wakeup() will pop us out of the halted 426 * state. 427 * 428 * This means that the ordering of the cstate_wakeup() and the clearing 429 * of the bit by cpu_wakeup is important. 430 * cpu_wakeup() must clear our mc_haltset bit, and then call 431 * cstate_wakeup(). 432 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 433 */ 434 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 435 (void) cstate_use_timer(&lapic_expire, 436 CSTATE_USING_LAT); 437 sti(); 438 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 439 return; 440 } 441 442 /* 443 * The check for anything locally runnable is here for performance 444 * and isn't needed for correctness. disp_nrunnable ought to be 445 * in our cache still, so it's inexpensive to check, and if there 446 * is anything runnable we won't have to wait for the poke. 447 */ 448 if (cpup->cpu_disp->disp_nrunnable != 0) { 449 (void) cstate_use_timer(&lapic_expire, 450 CSTATE_USING_LAT); 451 sti(); 452 if (hset_update) { 453 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 454 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 455 } 456 return; 457 } 458 459 if (using_timer == B_FALSE) { 460 461 (void) cstate_use_timer(&lapic_expire, 462 CSTATE_USING_LAT); 463 sti(); 464 465 /* 466 * We are currently unable to program the HPET to act as this 467 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper 468 * because no timer is set to wake it up while its LAPIC timer 469 * stalls in deep C-States. 470 * Enter C1 instead. 471 * 472 * cstate_wake_cpu() will wake this CPU with an IPI which 473 * works with MWAIT. 474 */ 475 i86_monitor(mcpu_mwait, 0, 0); 476 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 477 if (cpu_idle_enter(IDLE_STATE_C1, 0, 478 check_func, (void *)mcpu_mwait) == 0) { 479 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == 480 MWAIT_HALTED) { 481 i86_mwait(0, 0); 482 } 483 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 484 } 485 } 486 487 /* 488 * We're no longer halted 489 */ 490 if (hset_update) { 491 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 492 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 493 } 494 return; 495 } 496 497 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 498 /* 499 * We're on our way to being halted. 500 * To avoid a lost wakeup, arm the monitor before checking 501 * if another cpu wrote to mcpu_mwait to wake us up. 502 */ 503 i86_monitor(mcpu_mwait, 0, 0); 504 if (*mcpu_mwait == MWAIT_HALTED) { 505 if (cpu_idle_enter((uint_t)cs_type, 0, 506 check_func, (void *)mcpu_mwait) == 0) { 507 if (*mcpu_mwait == MWAIT_HALTED) { 508 i86_mwait(cstate->cs_address, 1); 509 } 510 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 511 } 512 } 513 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 514 uint32_t value; 515 ACPI_TABLE_FADT *gbl_FADT; 516 517 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 518 if (cpu_idle_enter((uint_t)cs_type, 0, 519 check_func, (void *)mcpu_mwait) == 0) { 520 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 521 (void) cpu_acpi_read_port( 522 cstate->cs_address, &value, 8); 523 acpica_get_global_FADT(&gbl_FADT); 524 (void) cpu_acpi_read_port( 525 gbl_FADT->XPmTimerBlock.Address, 526 &value, 32); 527 } 528 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); 529 } 530 } 531 } 532 533 /* 534 * The LAPIC timer may have stopped in deep c-state. 535 * Reprogram this CPU's LAPIC here before enabling interrupts. 536 */ 537 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); 538 sti(); 539 540 /* 541 * We're no longer halted 542 */ 543 if (hset_update) { 544 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 545 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 546 } 547 } 548 549 /* 550 * Idle the present CPU, deep c-state is supported 551 */ 552 void 553 cpu_acpi_idle(void) 554 { 555 cpu_t *cp = CPU; 556 cpu_acpi_handle_t handle; 557 cma_c_state_t *cs_data; 558 cpu_acpi_cstate_t *cstates; 559 hrtime_t start, end; 560 int cpu_max_cstates; 561 uint32_t cs_indx; 562 uint16_t cs_type; 563 564 cpupm_mach_state_t *mach_state = 565 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 566 handle = mach_state->ms_acpi_handle; 567 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 568 569 cs_data = mach_state->ms_cstate.cma_state.cstate; 570 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 571 ASSERT(cstates != NULL); 572 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 573 if (cpu_max_cstates > CPU_MAX_CSTATES) 574 cpu_max_cstates = CPU_MAX_CSTATES; 575 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 576 (*non_deep_idle_cpu)(); 577 return; 578 } 579 580 start = gethrtime_unscaled(); 581 582 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 583 584 cs_type = cstates[cs_indx].cs_type; 585 586 switch (cs_type) { 587 default: 588 /* FALLTHROUGH */ 589 case CPU_ACPI_C1: 590 (*non_deep_idle_cpu)(); 591 break; 592 593 case CPU_ACPI_C2: 594 acpi_cpu_cstate(&cstates[cs_indx]); 595 break; 596 597 case CPU_ACPI_C3: 598 /* 599 * All supported Intel processors maintain cache coherency 600 * during C3. Currently when entering C3 processors flush 601 * core caches to higher level shared cache. The shared cache 602 * maintains state and supports probes during C3. 603 * Consequently there is no need to handle cache coherency 604 * and Bus Master activity here with the cache flush, BM_RLD 605 * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described 606 * in section 8.1.4 of the ACPI Specification 4.0. 607 */ 608 acpi_cpu_cstate(&cstates[cs_indx]); 609 break; 610 } 611 612 end = gethrtime_unscaled(); 613 614 /* 615 * Update statistics 616 */ 617 cpupm_wakeup_cstate_data(cs_data, end); 618 } 619 620 boolean_t 621 cpu_deep_cstates_supported(void) 622 { 623 extern int idle_cpu_no_deep_c; 624 625 if (idle_cpu_no_deep_c) 626 return (B_FALSE); 627 628 if (!cpuid_deep_cstates_supported()) 629 return (B_FALSE); 630 631 if (cpuid_arat_supported()) { 632 cpu_cstate_arat = B_TRUE; 633 return (B_TRUE); 634 } 635 636 if ((hpet.supported == HPET_FULL_SUPPORT) && 637 hpet.install_proxy()) { 638 cpu_cstate_hpet = B_TRUE; 639 return (B_TRUE); 640 } 641 642 return (B_FALSE); 643 } 644 645 /* 646 * Validate that this processor supports deep cstate and if so, 647 * get the c-state data from ACPI and cache it. 648 */ 649 static int 650 cpu_idle_init(cpu_t *cp) 651 { 652 cpupm_mach_state_t *mach_state = 653 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 654 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 655 cpu_acpi_cstate_t *cstate; 656 char name[KSTAT_STRLEN]; 657 int cpu_max_cstates, i; 658 int ret; 659 660 /* 661 * Cache the C-state specific ACPI data. 662 */ 663 if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) { 664 if (ret < 0) 665 cmn_err(CE_NOTE, 666 "!Support for CPU deep idle states is being " 667 "disabled due to errors parsing ACPI C-state " 668 "objects exported by BIOS."); 669 cpu_idle_fini(cp); 670 return (-1); 671 } 672 673 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 674 675 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 676 677 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 678 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 679 /* 680 * Allocate, initialize and install cstate kstat 681 */ 682 cstate->cs_ksp = kstat_create("cstate", cp->cpu_id, 683 name, "misc", 684 KSTAT_TYPE_NAMED, 685 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 686 KSTAT_FLAG_VIRTUAL); 687 688 if (cstate->cs_ksp == NULL) { 689 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 690 } else { 691 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 692 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 693 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 694 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 695 cstate->cs_ksp->ks_private = cstate; 696 kstat_install(cstate->cs_ksp); 697 } 698 cstate++; 699 } 700 701 cpupm_alloc_domains(cp, CPUPM_C_STATES); 702 cpupm_alloc_ms_cstate(cp); 703 704 if (cpu_deep_cstates_supported()) { 705 uint32_t value; 706 707 mutex_enter(&cpu_idle_callb_mutex); 708 if (cpu_deep_idle_callb_id == (callb_id_t)0) 709 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 710 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 711 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 712 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 713 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 714 mutex_exit(&cpu_idle_callb_mutex); 715 716 717 /* 718 * All supported CPUs (Nehalem and later) will remain in C3 719 * during Bus Master activity. 720 * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it 721 * is not already 0 before enabling Deeper C-states. 722 */ 723 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value); 724 if (value & 1) 725 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 726 } 727 728 return (0); 729 } 730 731 /* 732 * Free resources allocated by cpu_idle_init(). 733 */ 734 static void 735 cpu_idle_fini(cpu_t *cp) 736 { 737 cpupm_mach_state_t *mach_state = 738 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 739 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 740 cpu_acpi_cstate_t *cstate; 741 uint_t cpu_max_cstates, i; 742 743 /* 744 * idle cpu points back to the generic one 745 */ 746 idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 747 disp_enq_thread = non_deep_idle_disp_enq_thread; 748 749 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 750 if (cstate) { 751 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 752 753 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 754 if (cstate->cs_ksp != NULL) 755 kstat_delete(cstate->cs_ksp); 756 cstate++; 757 } 758 } 759 760 cpupm_free_ms_cstate(cp); 761 cpupm_free_domains(&cpupm_cstate_domains); 762 cpu_acpi_free_cstate_data(handle); 763 764 mutex_enter(&cpu_idle_callb_mutex); 765 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 766 (void) callb_delete(cpu_deep_idle_callb_id); 767 cpu_deep_idle_callb_id = (callb_id_t)0; 768 } 769 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 770 (void) callb_delete(cpu_idle_cpr_callb_id); 771 cpu_idle_cpr_callb_id = (callb_id_t)0; 772 } 773 mutex_exit(&cpu_idle_callb_mutex); 774 } 775 776 /* 777 * This function is introduced here to solve a race condition 778 * between the master and the slave to touch c-state data structure. 779 * After the slave calls this idle function to switch to the non 780 * deep idle function, the master can go on to reclaim the resource. 781 */ 782 static void 783 cpu_idle_stop_sync(void) 784 { 785 /* switch to the non deep idle function */ 786 CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 787 } 788 789 static void 790 cpu_idle_stop(cpu_t *cp) 791 { 792 cpupm_mach_state_t *mach_state = 793 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 794 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 795 cpu_acpi_cstate_t *cstate; 796 uint_t cpu_max_cstates, i = 0; 797 798 mutex_enter(&cpu_idle_callb_mutex); 799 if (idle_cpu == cpu_idle_adaptive) { 800 /* 801 * invoke the slave to call synchronous idle function. 802 */ 803 cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync; 804 poke_cpu(cp->cpu_id); 805 806 /* 807 * wait until the slave switchs to non deep idle function, 808 * so that the master is safe to go on to reclaim the resource. 809 */ 810 while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) { 811 drv_usecwait(10); 812 if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0) 813 cmn_err(CE_NOTE, "!cpu_idle_stop: the slave" 814 " idle stop timeout"); 815 } 816 } 817 mutex_exit(&cpu_idle_callb_mutex); 818 819 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 820 if (cstate) { 821 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 822 823 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 824 if (cstate->cs_ksp != NULL) 825 kstat_delete(cstate->cs_ksp); 826 cstate++; 827 } 828 } 829 cpupm_free_ms_cstate(cp); 830 cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains); 831 cpu_acpi_free_cstate_data(handle); 832 } 833 834 /*ARGSUSED*/ 835 static boolean_t 836 cpu_deep_idle_callb(void *arg, int code) 837 { 838 boolean_t rslt = B_TRUE; 839 840 mutex_enter(&cpu_idle_callb_mutex); 841 switch (code) { 842 case PM_DEFAULT_CPU_DEEP_IDLE: 843 /* 844 * Default policy is same as enable 845 */ 846 /*FALLTHROUGH*/ 847 case PM_ENABLE_CPU_DEEP_IDLE: 848 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 849 break; 850 851 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) { 852 disp_enq_thread = cstate_wakeup; 853 idle_cpu = cpu_idle_adaptive; 854 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 855 } else { 856 rslt = B_FALSE; 857 } 858 break; 859 860 case PM_DISABLE_CPU_DEEP_IDLE: 861 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 862 break; 863 864 idle_cpu = non_deep_idle_cpu; 865 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) { 866 disp_enq_thread = non_deep_idle_disp_enq_thread; 867 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 868 } 869 break; 870 871 default: 872 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 873 code); 874 break; 875 } 876 mutex_exit(&cpu_idle_callb_mutex); 877 return (rslt); 878 } 879 880 /*ARGSUSED*/ 881 static boolean_t 882 cpu_idle_cpr_callb(void *arg, int code) 883 { 884 boolean_t rslt = B_TRUE; 885 886 mutex_enter(&cpu_idle_callb_mutex); 887 switch (code) { 888 case CB_CODE_CPR_RESUME: 889 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) { 890 /* 891 * Do not enable dispatcher hooks if disabled by user. 892 */ 893 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 894 break; 895 896 disp_enq_thread = cstate_wakeup; 897 idle_cpu = cpu_idle_adaptive; 898 } else { 899 rslt = B_FALSE; 900 } 901 break; 902 903 case CB_CODE_CPR_CHKPT: 904 idle_cpu = non_deep_idle_cpu; 905 disp_enq_thread = non_deep_idle_disp_enq_thread; 906 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT); 907 break; 908 909 default: 910 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 911 break; 912 } 913 mutex_exit(&cpu_idle_callb_mutex); 914 return (rslt); 915 } 916 917 /* 918 * handle _CST notification 919 */ 920 void 921 cpuidle_cstate_instance(cpu_t *cp) 922 { 923 #ifndef __xpv 924 cpupm_mach_state_t *mach_state = 925 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 926 cpu_acpi_handle_t handle; 927 struct machcpu *mcpu; 928 cpuset_t dom_cpu_set; 929 kmutex_t *pm_lock; 930 int result = 0; 931 processorid_t cpu_id; 932 933 if (mach_state == NULL) { 934 return; 935 } 936 937 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 938 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 939 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 940 941 /* 942 * Do for all the CPU's in the domain 943 */ 944 mutex_enter(pm_lock); 945 do { 946 CPUSET_FIND(dom_cpu_set, cpu_id); 947 if (cpu_id == CPUSET_NOTINSET) 948 break; 949 950 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 951 cp = cpu[cpu_id]; 952 mach_state = (cpupm_mach_state_t *) 953 cp->cpu_m.mcpu_pm_mach_state; 954 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 955 mutex_exit(pm_lock); 956 return; 957 } 958 handle = mach_state->ms_acpi_handle; 959 ASSERT(handle != NULL); 960 961 /* 962 * re-evaluate cstate object 963 */ 964 if (cpu_acpi_cache_cstate_data(handle) != 0) { 965 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 966 " object Instance: %d", cpu_id); 967 } 968 mcpu = &(cp->cpu_m); 969 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 970 if (mcpu->max_cstates > CPU_ACPI_C1) { 971 (void) cstate_timer_callback( 972 CST_EVENT_MULTIPLE_CSTATES); 973 disp_enq_thread = cstate_wakeup; 974 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 975 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 976 disp_enq_thread = non_deep_idle_disp_enq_thread; 977 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 978 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE); 979 } 980 981 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 982 } while (result < 0); 983 mutex_exit(pm_lock); 984 #endif 985 } 986 987 /* 988 * handle the number or the type of available processor power states change 989 */ 990 void 991 cpuidle_manage_cstates(void *ctx) 992 { 993 cpu_t *cp = ctx; 994 cpupm_mach_state_t *mach_state = 995 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 996 boolean_t is_ready; 997 998 if (mach_state == NULL) { 999 return; 1000 } 1001 1002 /* 1003 * We currently refuse to power manage if the CPU is not ready to 1004 * take cross calls (cross calls fail silently if CPU is not ready 1005 * for it). 1006 * 1007 * Additionally, for x86 platforms we cannot power manage an instance, 1008 * until it has been initialized. 1009 */ 1010 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp); 1011 if (!is_ready) 1012 return; 1013 1014 cpuidle_cstate_instance(cp); 1015 }