1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 #include <sys/cpu_pm.h> 31 #include <sys/x86_archext.h> 32 #include <sys/sdt.h> 33 #include <sys/spl.h> 34 #include <sys/machsystm.h> 35 #include <sys/archsystm.h> 36 #include <sys/hpet.h> 37 #include <sys/acpi/acpi.h> 38 #include <sys/acpica.h> 39 #include <sys/cpupm.h> 40 #include <sys/cpu_idle.h> 41 #include <sys/cpu_acpi.h> 42 #include <sys/cpupm_throttle.h> 43 #include <sys/dtrace.h> 44 #include <sys/note.h> 45 46 /* 47 * This callback is used to build the PPM CPU domains once 48 * a CPU device has been started. The callback is initialized 49 * by the PPM driver to point to a routine that will build the 50 * domains. 51 */ 52 void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *); 53 54 /* 55 * This callback is used to remove CPU from the PPM CPU domains 56 * when the cpu driver is detached. The callback is initialized 57 * by the PPM driver to point to a routine that will remove CPU 58 * from the domains. 59 */ 60 void (*cpupm_ppm_free_pstate_domains)(cpu_t *); 61 62 /* 63 * This callback is used to redefine the topspeed for a CPU device. 64 * Since all CPUs in a domain should have identical properties, this 65 * callback is initialized by the PPM driver to point to a routine 66 * that will redefine the topspeed for all devices in a CPU domain. 67 * This callback is exercised whenever an ACPI _PPC change notification 68 * is received by the CPU driver. 69 */ 70 void (*cpupm_redefine_topspeed)(void *); 71 72 /* 73 * This callback is used by the PPM driver to call into the CPU driver 74 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 75 */ 76 void (*cpupm_set_topspeed_callb)(void *, int); 77 78 /* 79 * This callback is used by the PPM driver to call into the CPU driver 80 * to set a new topspeed for a CPU. 81 */ 82 int (*cpupm_get_topspeed_callb)(void *); 83 84 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 85 static void cpupm_free_notify_handlers(cpu_t *); 86 static void cpupm_power_manage_notifications(void *); 87 88 /* 89 * Until proven otherwise, all power states are manageable. 90 */ 91 static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 92 93 cpupm_state_domains_t *cpupm_pstate_domains = NULL; 94 cpupm_state_domains_t *cpupm_tstate_domains = NULL; 95 cpupm_state_domains_t *cpupm_cstate_domains = NULL; 96 97 /* 98 * c-state tunables 99 * 100 * cpupm_cs_sample_interval is the length of time we wait before 101 * recalculating c-state statistics. When a CPU goes idle it checks 102 * to see if it has been longer than cpupm_cs_sample_interval since it last 103 * caculated which C-state to go to. 104 * 105 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 106 * divided by time spent in the idle state transitions. 107 * A value of 10 means the CPU will not spend more than 1/10 of its time 108 * in idle latency. The worst case performance will be 90% of non Deep C-state 109 * kernel. 110 * 111 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 112 * before it is worth going there. Expressed as a multiple of latency. 113 */ 114 uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */ 115 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 116 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 117 uint16_t cpupm_C2_idle_pct_tunable = 70; 118 uint16_t cpupm_C3_idle_pct_tunable = 80; 119 120 #ifndef __xpv 121 extern boolean_t cpupm_intel_init(cpu_t *); 122 extern boolean_t cpupm_amd_init(cpu_t *); 123 124 typedef struct cpupm_vendor { 125 boolean_t (*cpuv_init)(cpu_t *); 126 } cpupm_vendor_t; 127 128 /* 129 * Table of supported vendors. 130 */ 131 static cpupm_vendor_t cpupm_vendors[] = { 132 cpupm_intel_init, 133 cpupm_amd_init, 134 NULL 135 }; 136 #endif 137 138 /* 139 * Initialize the machine. 140 * See if a module exists for managing power for this CPU. 141 */ 142 /*ARGSUSED*/ 143 void 144 cpupm_init(cpu_t *cp) 145 { 146 #ifndef __xpv 147 cpupm_vendor_t *vendors; 148 cpupm_mach_state_t *mach_state; 149 struct machcpu *mcpu = &(cp->cpu_m); 150 static boolean_t first = B_TRUE; 151 int *speeds; 152 uint_t nspeeds; 153 int ret; 154 155 mach_state = cp->cpu_m.mcpu_pm_mach_state = 156 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 157 mach_state->ms_caps = CPUPM_NO_STATES; 158 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 159 160 mach_state->ms_acpi_handle = cpu_acpi_init(cp); 161 if (mach_state->ms_acpi_handle == NULL) { 162 cpupm_fini(cp); 163 cmn_err(CE_WARN, "!cpupm_init: processor %d: " 164 "unable to get ACPI handle", cp->cpu_id); 165 cmn_err(CE_NOTE, "!CPU power management will not function."); 166 CPUPM_DISABLE(); 167 first = B_FALSE; 168 return; 169 } 170 171 /* 172 * Loop through the CPU management module table and see if 173 * any of the modules implement CPU power management 174 * for this CPU. 175 */ 176 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 177 if (vendors->cpuv_init(cp)) 178 break; 179 } 180 181 /* 182 * Nope, we can't power manage this CPU. 183 */ 184 if (vendors == NULL) { 185 cpupm_fini(cp); 186 CPUPM_DISABLE(); 187 first = B_FALSE; 188 return; 189 } 190 191 /* 192 * If P-state support exists for this system, then initialize it. 193 */ 194 if (mach_state->ms_pstate.cma_ops != NULL) { 195 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 196 if (ret != 0) { 197 mach_state->ms_pstate.cma_ops = NULL; 198 cpupm_disable(CPUPM_P_STATES); 199 } else { 200 nspeeds = cpupm_get_speeds(cp, &speeds); 201 if (nspeeds == 0) { 202 cmn_err(CE_NOTE, "!cpupm_init: processor %d:" 203 " no speeds to manage", cp->cpu_id); 204 } else { 205 cpupm_set_supp_freqs(cp, speeds, nspeeds); 206 cpupm_free_speeds(speeds, nspeeds); 207 mach_state->ms_caps |= CPUPM_P_STATES; 208 } 209 } 210 } else { 211 cpupm_disable(CPUPM_P_STATES); 212 } 213 214 if (mach_state->ms_tstate.cma_ops != NULL) { 215 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 216 if (ret != 0) { 217 mach_state->ms_tstate.cma_ops = NULL; 218 cpupm_disable(CPUPM_T_STATES); 219 } else { 220 mach_state->ms_caps |= CPUPM_T_STATES; 221 } 222 } else { 223 cpupm_disable(CPUPM_T_STATES); 224 } 225 226 /* 227 * If C-states support exists for this system, then initialize it. 228 */ 229 if (mach_state->ms_cstate.cma_ops != NULL) { 230 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 231 if (ret != 0) { 232 mach_state->ms_cstate.cma_ops = NULL; 233 mcpu->max_cstates = CPU_ACPI_C1; 234 cpupm_disable(CPUPM_C_STATES); 235 idle_cpu = non_deep_idle_cpu; 236 disp_enq_thread = non_deep_idle_disp_enq_thread; 237 } else if (cpu_deep_cstates_supported()) { 238 mcpu->max_cstates = cpu_acpi_get_max_cstates( 239 mach_state->ms_acpi_handle); 240 if (mcpu->max_cstates > CPU_ACPI_C1) { 241 (void) cstate_timer_callback( 242 CST_EVENT_MULTIPLE_CSTATES); 243 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 244 mcpu->mcpu_idle_type = CPU_ACPI_C1; 245 disp_enq_thread = cstate_wakeup; 246 } else { 247 (void) cstate_timer_callback( 248 CST_EVENT_ONE_CSTATE); 249 } 250 mach_state->ms_caps |= CPUPM_C_STATES; 251 } else { 252 mcpu->max_cstates = CPU_ACPI_C1; 253 idle_cpu = non_deep_idle_cpu; 254 disp_enq_thread = non_deep_idle_disp_enq_thread; 255 } 256 } else { 257 cpupm_disable(CPUPM_C_STATES); 258 } 259 260 261 if (mach_state->ms_caps == CPUPM_NO_STATES) { 262 cpupm_fini(cp); 263 CPUPM_DISABLE(); 264 first = B_FALSE; 265 return; 266 } 267 268 if ((mach_state->ms_caps & CPUPM_T_STATES) || 269 (mach_state->ms_caps & CPUPM_P_STATES) || 270 (mach_state->ms_caps & CPUPM_C_STATES)) { 271 if (first) { 272 acpica_write_cpupm_capabilities( 273 mach_state->ms_caps & CPUPM_P_STATES, 274 mach_state->ms_caps & CPUPM_C_STATES); 275 } 276 if (mach_state->ms_caps & CPUPM_T_STATES) { 277 cpupm_throttle_manage_notification(cp); 278 } 279 if (mach_state->ms_caps & CPUPM_C_STATES) { 280 cpuidle_manage_cstates(cp); 281 } 282 if (mach_state->ms_caps & CPUPM_P_STATES) { 283 cpupm_power_manage_notifications(cp); 284 } 285 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 286 } 287 first = B_FALSE; 288 #endif 289 } 290 291 /* 292 * Free any resources allocated during cpupm initialization or cpupm start. 293 */ 294 /*ARGSUSED*/ 295 void 296 cpupm_free(cpu_t *cp, boolean_t cpupm_stop) 297 { 298 #ifndef __xpv 299 cpupm_mach_state_t *mach_state = 300 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 301 302 if (mach_state == NULL) 303 return; 304 305 if (mach_state->ms_pstate.cma_ops != NULL) { 306 if (cpupm_stop) 307 mach_state->ms_pstate.cma_ops->cpus_stop(cp); 308 else 309 mach_state->ms_pstate.cma_ops->cpus_fini(cp); 310 mach_state->ms_pstate.cma_ops = NULL; 311 } 312 313 if (mach_state->ms_tstate.cma_ops != NULL) { 314 if (cpupm_stop) 315 mach_state->ms_tstate.cma_ops->cpus_stop(cp); 316 else 317 mach_state->ms_tstate.cma_ops->cpus_fini(cp); 318 mach_state->ms_tstate.cma_ops = NULL; 319 } 320 321 if (mach_state->ms_cstate.cma_ops != NULL) { 322 if (cpupm_stop) 323 mach_state->ms_cstate.cma_ops->cpus_stop(cp); 324 else 325 mach_state->ms_cstate.cma_ops->cpus_fini(cp); 326 327 mach_state->ms_cstate.cma_ops = NULL; 328 } 329 330 cpupm_free_notify_handlers(cp); 331 332 if (mach_state->ms_acpi_handle != NULL) { 333 cpu_acpi_fini(mach_state->ms_acpi_handle); 334 mach_state->ms_acpi_handle = NULL; 335 } 336 337 mutex_destroy(&mach_state->ms_lock); 338 kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 339 cp->cpu_m.mcpu_pm_mach_state = NULL; 340 #endif 341 } 342 343 void 344 cpupm_fini(cpu_t *cp) 345 { 346 /* 347 * call (*cpus_fini)() ops to release the cpupm resource 348 * in the P/C/T-state driver 349 */ 350 cpupm_free(cp, B_FALSE); 351 } 352 353 void 354 cpupm_start(cpu_t *cp) 355 { 356 cpupm_init(cp); 357 } 358 359 void 360 cpupm_stop(cpu_t *cp) 361 { 362 /* 363 * call (*cpus_stop)() ops to reclaim the cpupm resource 364 * in the P/C/T-state driver 365 */ 366 cpupm_free(cp, B_TRUE); 367 } 368 369 /* 370 * If A CPU has started and at least one power state is manageable, 371 * then the CPU is ready for power management. 372 */ 373 boolean_t 374 cpupm_is_ready(cpu_t *cp) 375 { 376 #ifndef __xpv 377 cpupm_mach_state_t *mach_state = 378 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 379 uint32_t cpupm_caps = mach_state->ms_caps; 380 381 if (cpupm_enabled == CPUPM_NO_STATES) 382 return (B_FALSE); 383 384 if ((cpupm_caps & CPUPM_T_STATES) || 385 (cpupm_caps & CPUPM_P_STATES) || 386 (cpupm_caps & CPUPM_C_STATES)) 387 388 return (B_TRUE); 389 return (B_FALSE); 390 #else 391 _NOTE(ARGUNUSED(cp)); 392 return (B_FALSE); 393 #endif 394 } 395 396 boolean_t 397 cpupm_is_enabled(uint32_t state) 398 { 399 return ((cpupm_enabled & state) == state); 400 } 401 402 /* 403 * By default, all states are enabled. 404 */ 405 void 406 cpupm_disable(uint32_t state) 407 { 408 409 if (state & CPUPM_P_STATES) { 410 cpupm_free_domains(&cpupm_pstate_domains); 411 } 412 if (state & CPUPM_T_STATES) { 413 cpupm_free_domains(&cpupm_tstate_domains); 414 } 415 if (state & CPUPM_C_STATES) { 416 cpupm_free_domains(&cpupm_cstate_domains); 417 } 418 cpupm_enabled &= ~state; 419 } 420 421 /* 422 * Allocate power domains for C,P and T States 423 */ 424 void 425 cpupm_alloc_domains(cpu_t *cp, int state) 426 { 427 cpupm_mach_state_t *mach_state = 428 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 429 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 430 cpupm_state_domains_t **dom_ptr; 431 cpupm_state_domains_t *dptr; 432 cpupm_state_domains_t **mach_dom_state_ptr; 433 uint32_t domain; 434 uint32_t type; 435 436 switch (state) { 437 case CPUPM_P_STATES: 438 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 439 domain = CPU_ACPI_PSD(handle).sd_domain; 440 type = CPU_ACPI_PSD(handle).sd_type; 441 } else { 442 if (MUTEX_HELD(&cpu_lock)) { 443 domain = cpuid_get_chipid(cp); 444 } else { 445 mutex_enter(&cpu_lock); 446 domain = cpuid_get_chipid(cp); 447 mutex_exit(&cpu_lock); 448 } 449 type = CPU_ACPI_HW_ALL; 450 } 451 dom_ptr = &cpupm_pstate_domains; 452 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 453 break; 454 case CPUPM_T_STATES: 455 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 456 domain = CPU_ACPI_TSD(handle).sd_domain; 457 type = CPU_ACPI_TSD(handle).sd_type; 458 } else { 459 if (MUTEX_HELD(&cpu_lock)) { 460 domain = cpuid_get_chipid(cp); 461 } else { 462 mutex_enter(&cpu_lock); 463 domain = cpuid_get_chipid(cp); 464 mutex_exit(&cpu_lock); 465 } 466 type = CPU_ACPI_HW_ALL; 467 } 468 dom_ptr = &cpupm_tstate_domains; 469 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 470 break; 471 case CPUPM_C_STATES: 472 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 473 domain = CPU_ACPI_CSD(handle).sd_domain; 474 type = CPU_ACPI_CSD(handle).sd_type; 475 } else { 476 if (MUTEX_HELD(&cpu_lock)) { 477 domain = cpuid_get_coreid(cp); 478 } else { 479 mutex_enter(&cpu_lock); 480 domain = cpuid_get_coreid(cp); 481 mutex_exit(&cpu_lock); 482 } 483 type = CPU_ACPI_HW_ALL; 484 } 485 dom_ptr = &cpupm_cstate_domains; 486 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 487 break; 488 default: 489 return; 490 } 491 492 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 493 if (dptr->pm_domain == domain) 494 break; 495 } 496 497 /* new domain is created and linked at the head */ 498 if (dptr == NULL) { 499 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 500 dptr->pm_domain = domain; 501 dptr->pm_type = type; 502 dptr->pm_next = *dom_ptr; 503 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 504 (void *)ipltospl(DISP_LEVEL)); 505 CPUSET_ZERO(dptr->pm_cpus); 506 *dom_ptr = dptr; 507 } 508 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 509 *mach_dom_state_ptr = dptr; 510 } 511 512 /* 513 * Free C, P or T state power domains 514 */ 515 void 516 cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 517 { 518 cpupm_state_domains_t *this_domain, *next_domain; 519 520 this_domain = *dom_ptr; 521 while (this_domain != NULL) { 522 next_domain = this_domain->pm_next; 523 mutex_destroy(&this_domain->pm_lock); 524 kmem_free((void *)this_domain, 525 sizeof (cpupm_state_domains_t)); 526 this_domain = next_domain; 527 } 528 *dom_ptr = NULL; 529 } 530 531 /* 532 * Remove CPU from C, P or T state power domains 533 */ 534 void 535 cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr) 536 { 537 cpupm_mach_state_t *mach_state = 538 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 539 cpupm_state_domains_t *dptr; 540 uint32_t pm_domain; 541 542 ASSERT(mach_state); 543 544 switch (state) { 545 case CPUPM_P_STATES: 546 pm_domain = mach_state->ms_pstate.cma_domain->pm_domain; 547 break; 548 case CPUPM_T_STATES: 549 pm_domain = mach_state->ms_tstate.cma_domain->pm_domain; 550 break; 551 case CPUPM_C_STATES: 552 pm_domain = mach_state->ms_cstate.cma_domain->pm_domain; 553 break; 554 default: 555 return; 556 } 557 558 /* 559 * Find the CPU C, P or T state power domain 560 */ 561 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 562 if (dptr->pm_domain == pm_domain) 563 break; 564 } 565 566 /* 567 * return if no matched domain found 568 */ 569 if (dptr == NULL) 570 return; 571 572 /* 573 * We found one matched power domain, remove CPU from its cpuset. 574 * pm_lock(spin lock) here to avoid the race conditions between 575 * event change notification and cpu remove. 576 */ 577 mutex_enter(&dptr->pm_lock); 578 if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id)) 579 CPUSET_DEL(dptr->pm_cpus, cp->cpu_id); 580 mutex_exit(&dptr->pm_lock); 581 } 582 583 void 584 cpupm_alloc_ms_cstate(cpu_t *cp) 585 { 586 cpupm_mach_state_t *mach_state; 587 cpupm_mach_acpi_state_t *ms_cstate; 588 589 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 590 ms_cstate = &mach_state->ms_cstate; 591 ASSERT(ms_cstate->cma_state.cstate == NULL); 592 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 593 KM_SLEEP); 594 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 595 } 596 597 void 598 cpupm_free_ms_cstate(cpu_t *cp) 599 { 600 cpupm_mach_state_t *mach_state = 601 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 602 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 603 604 if (ms_cstate->cma_state.cstate != NULL) { 605 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 606 ms_cstate->cma_state.cstate = NULL; 607 } 608 } 609 610 void 611 cpupm_state_change(cpu_t *cp, int level, int state) 612 { 613 cpupm_mach_state_t *mach_state = 614 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 615 cpupm_state_ops_t *state_ops; 616 cpupm_state_domains_t *state_domain; 617 cpuset_t set; 618 619 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 620 621 if (mach_state == NULL) { 622 return; 623 } 624 625 switch (state) { 626 case CPUPM_P_STATES: 627 state_ops = mach_state->ms_pstate.cma_ops; 628 state_domain = mach_state->ms_pstate.cma_domain; 629 break; 630 case CPUPM_T_STATES: 631 state_ops = mach_state->ms_tstate.cma_ops; 632 state_domain = mach_state->ms_tstate.cma_domain; 633 break; 634 default: 635 break; 636 } 637 638 switch (state_domain->pm_type) { 639 case CPU_ACPI_SW_ANY: 640 /* 641 * A request on any CPU in the domain transitions the domain 642 */ 643 CPUSET_ONLY(set, cp->cpu_id); 644 state_ops->cpus_change(set, level); 645 break; 646 case CPU_ACPI_SW_ALL: 647 /* 648 * All CPUs in the domain must request the transition 649 */ 650 case CPU_ACPI_HW_ALL: 651 /* 652 * P/T-state transitions are coordinated by the hardware 653 * For now, request the transition on all CPUs in the domain, 654 * but looking ahead we can probably be smarter about this. 655 */ 656 mutex_enter(&state_domain->pm_lock); 657 state_ops->cpus_change(state_domain->pm_cpus, level); 658 mutex_exit(&state_domain->pm_lock); 659 break; 660 default: 661 cmn_err(CE_NOTE, "Unknown domain coordination type: %d", 662 state_domain->pm_type); 663 } 664 } 665 666 /* 667 * CPU PM interfaces exposed to the CPU power manager 668 */ 669 /*ARGSUSED*/ 670 id_t 671 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 672 { 673 cpupm_mach_state_t *mach_state = 674 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 675 676 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 677 !cpupm_is_enabled(CPUPM_C_STATES))) { 678 return (CPUPM_NO_DOMAIN); 679 } 680 if (type == CPUPM_DTYPE_ACTIVE) { 681 /* 682 * Return P-State domain for the specified CPU 683 */ 684 if (mach_state->ms_pstate.cma_domain) { 685 return (mach_state->ms_pstate.cma_domain->pm_domain); 686 } 687 } else if (type == CPUPM_DTYPE_IDLE) { 688 /* 689 * Return C-State domain for the specified CPU 690 */ 691 if (mach_state->ms_cstate.cma_domain) { 692 return (mach_state->ms_cstate.cma_domain->pm_domain); 693 } 694 } 695 return (CPUPM_NO_DOMAIN); 696 } 697 698 /*ARGSUSED*/ 699 uint_t 700 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 701 cpupm_state_t *states) 702 { 703 int *speeds; 704 uint_t nspeeds, i; 705 706 /* 707 * Idle domain support unimplemented 708 */ 709 if (type != CPUPM_DTYPE_ACTIVE) { 710 return (0); 711 } 712 nspeeds = cpupm_get_speeds(cp, &speeds); 713 714 /* 715 * If the caller passes NULL for states, just return the 716 * number of states. 717 */ 718 if (states != NULL) { 719 for (i = 0; i < nspeeds; i++) { 720 states[i].cps_speed = speeds[i]; 721 states[i].cps_handle = (cpupm_handle_t)i; 722 } 723 } 724 cpupm_free_speeds(speeds, nspeeds); 725 return (nspeeds); 726 } 727 728 /*ARGSUSED*/ 729 int 730 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 731 { 732 if (!cpupm_is_ready(cp)) 733 return (-1); 734 735 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 736 737 return (0); 738 } 739 740 /*ARGSUSED*/ 741 /* 742 * Note: It is the responsibility of the users of 743 * cpupm_get_speeds() to free the memory allocated 744 * for speeds using cpupm_free_speeds() 745 */ 746 uint_t 747 cpupm_get_speeds(cpu_t *cp, int **speeds) 748 { 749 #ifndef __xpv 750 cpupm_mach_state_t *mach_state = 751 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 752 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 753 #else 754 return (0); 755 #endif 756 } 757 758 /*ARGSUSED*/ 759 void 760 cpupm_free_speeds(int *speeds, uint_t nspeeds) 761 { 762 #ifndef __xpv 763 cpu_acpi_free_speeds(speeds, nspeeds); 764 #endif 765 } 766 767 /* 768 * All CPU instances have been initialized successfully. 769 */ 770 boolean_t 771 cpupm_power_ready(cpu_t *cp) 772 { 773 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp)); 774 } 775 776 /* 777 * All CPU instances have been initialized successfully. 778 */ 779 boolean_t 780 cpupm_throttle_ready(cpu_t *cp) 781 { 782 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp)); 783 } 784 785 /* 786 * All CPU instances have been initialized successfully. 787 */ 788 boolean_t 789 cpupm_cstate_ready(cpu_t *cp) 790 { 791 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp)); 792 } 793 794 void 795 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 796 { 797 cpu_t *cp = ctx; 798 cpupm_mach_state_t *mach_state = 799 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 800 cpupm_notification_t *entry; 801 802 mutex_enter(&mach_state->ms_lock); 803 for (entry = mach_state->ms_handlers; entry != NULL; 804 entry = entry->nq_next) { 805 entry->nq_handler(obj, val, entry->nq_ctx); 806 } 807 mutex_exit(&mach_state->ms_lock); 808 } 809 810 /*ARGSUSED*/ 811 void 812 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 813 { 814 #ifndef __xpv 815 cpupm_mach_state_t *mach_state = 816 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 817 cpupm_notification_t *entry; 818 819 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 820 entry->nq_handler = handler; 821 entry->nq_ctx = ctx; 822 mutex_enter(&mach_state->ms_lock); 823 if (mach_state->ms_handlers == NULL) { 824 entry->nq_next = NULL; 825 mach_state->ms_handlers = entry; 826 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 827 cpupm_notify_handler, cp); 828 829 } else { 830 entry->nq_next = mach_state->ms_handlers; 831 mach_state->ms_handlers = entry; 832 } 833 mutex_exit(&mach_state->ms_lock); 834 #endif 835 } 836 837 /*ARGSUSED*/ 838 static void 839 cpupm_free_notify_handlers(cpu_t *cp) 840 { 841 #ifndef __xpv 842 cpupm_mach_state_t *mach_state = 843 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 844 cpupm_notification_t *entry; 845 cpupm_notification_t *next; 846 847 mutex_enter(&mach_state->ms_lock); 848 if (mach_state->ms_handlers == NULL) { 849 mutex_exit(&mach_state->ms_lock); 850 return; 851 } 852 if (mach_state->ms_acpi_handle != NULL) { 853 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 854 cpupm_notify_handler); 855 } 856 entry = mach_state->ms_handlers; 857 while (entry != NULL) { 858 next = entry->nq_next; 859 kmem_free(entry, sizeof (cpupm_notification_t)); 860 entry = next; 861 } 862 mach_state->ms_handlers = NULL; 863 mutex_exit(&mach_state->ms_lock); 864 #endif 865 } 866 867 /* 868 * Get the current max speed from the ACPI _PPC object 869 */ 870 /*ARGSUSED*/ 871 int 872 cpupm_get_top_speed(cpu_t *cp) 873 { 874 #ifndef __xpv 875 cpupm_mach_state_t *mach_state; 876 cpu_acpi_handle_t handle; 877 int plat_level; 878 uint_t nspeeds; 879 int max_level; 880 881 mach_state = 882 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 883 handle = mach_state->ms_acpi_handle; 884 885 cpu_acpi_cache_ppc(handle); 886 plat_level = CPU_ACPI_PPC(handle); 887 888 nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 889 890 max_level = nspeeds - 1; 891 if ((plat_level < 0) || (plat_level > max_level)) { 892 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 893 "_PPC out of range %d", cp->cpu_id, plat_level); 894 plat_level = 0; 895 } 896 897 return (plat_level); 898 #else 899 return (0); 900 #endif 901 } 902 903 /* 904 * This notification handler is called whenever the ACPI _PPC 905 * object changes. The _PPC is a sort of governor on power levels. 906 * It sets an upper threshold on which, _PSS defined, power levels 907 * are usuable. The _PPC value is dynamic and may change as properties 908 * (i.e., thermal or AC source) of the system change. 909 */ 910 911 static void 912 cpupm_power_manage_notifications(void *ctx) 913 { 914 cpu_t *cp = ctx; 915 int top_speed; 916 917 top_speed = cpupm_get_top_speed(cp); 918 cpupm_redefine_max_activepwr_state(cp, top_speed); 919 } 920 921 /* ARGSUSED */ 922 static void 923 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 924 { 925 #ifndef __xpv 926 927 cpu_t *cp = ctx; 928 cpupm_mach_state_t *mach_state = 929 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 930 931 if (mach_state == NULL) 932 return; 933 934 /* 935 * Currently, we handle _TPC,_CST and _PPC change notifications. 936 */ 937 if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 938 mach_state->ms_caps & CPUPM_T_STATES) { 939 cpupm_throttle_manage_notification(ctx); 940 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 941 mach_state->ms_caps & CPUPM_C_STATES) { 942 cpuidle_manage_cstates(ctx); 943 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 944 mach_state->ms_caps & CPUPM_P_STATES) { 945 cpupm_power_manage_notifications(ctx); 946 } 947 #endif 948 } 949 950 /* 951 * Update cpupm cstate data each time CPU exits idle. 952 */ 953 void 954 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 955 { 956 cs_data->cs_idle_exit = end; 957 } 958 959 /* 960 * Determine next cstate based on cpupm data. 961 * Update cpupm cstate data each time CPU goes idle. 962 * Do as much as possible in the idle state bookkeeping function because the 963 * performance impact while idle is minimal compared to in the wakeup function 964 * when there is real work to do. 965 */ 966 uint32_t 967 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 968 uint32_t cs_count, hrtime_t start) 969 { 970 hrtime_t duration; 971 hrtime_t ave_interval; 972 hrtime_t ave_idle_time; 973 uint32_t i, smpl_cnt; 974 975 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 976 scalehrtime(&duration); 977 cs_data->cs_idle += duration; 978 cs_data->cs_idle_enter = start; 979 980 smpl_cnt = ++cs_data->cs_cnt; 981 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 982 scalehrtime(&cs_data->cs_smpl_len); 983 if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) { 984 cs_data->cs_smpl_idle = cs_data->cs_idle; 985 cs_data->cs_idle = 0; 986 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 987 cs_data->cs_smpl_len); 988 989 cs_data->cs_smpl_start = start; 990 cs_data->cs_cnt = 0; 991 992 /* 993 * Strand level C-state policy 994 * The cpu_acpi_cstate_t *cstates array is not required to 995 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 996 * There are cs_count entries in the cstates array. 997 * cs_data->cs_next_cstate contains the index of the next 998 * C-state this CPU should enter. 999 */ 1000 ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 1001 1002 /* 1003 * Will CPU be idle long enough to save power? 1004 */ 1005 ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000; 1006 for (i = 1; i < cs_count; ++i) { 1007 if (ave_idle_time < (cstates[i].cs_latency * 1008 cpupm_cs_idle_save_tunable)) { 1009 cs_count = i; 1010 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 1011 CPU, int, i); 1012 } 1013 } 1014 1015 /* 1016 * Wakeup often (even when non-idle time is very short)? 1017 * Some producer/consumer type loads fall into this category. 1018 */ 1019 ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000; 1020 for (i = 1; i < cs_count; ++i) { 1021 if (ave_interval <= (cstates[i].cs_latency * 1022 cpupm_cs_idle_cost_tunable)) { 1023 cs_count = i; 1024 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 1025 CPU, int, (CPU_MAX_CSTATES + i)); 1026 } 1027 } 1028 1029 /* 1030 * Idle percent 1031 */ 1032 for (i = 1; i < cs_count; ++i) { 1033 switch (cstates[i].cs_type) { 1034 case CPU_ACPI_C2: 1035 if (cs_data->cs_smpl_idle_pct < 1036 cpupm_C2_idle_pct_tunable) { 1037 cs_count = i; 1038 DTRACE_PROBE2(cpupm__next__cstate, 1039 cpu_t *, CPU, int, 1040 ((2 * CPU_MAX_CSTATES) + i)); 1041 } 1042 break; 1043 1044 case CPU_ACPI_C3: 1045 if (cs_data->cs_smpl_idle_pct < 1046 cpupm_C3_idle_pct_tunable) { 1047 cs_count = i; 1048 DTRACE_PROBE2(cpupm__next__cstate, 1049 cpu_t *, CPU, int, 1050 ((2 * CPU_MAX_CSTATES) + i)); 1051 } 1052 break; 1053 } 1054 } 1055 1056 cs_data->cs_next_cstate = cs_count - 1; 1057 } 1058 1059 return (cs_data->cs_next_cstate); 1060 }