Print this page
PANKOVs restructure
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
+++ new/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2009, Intel Corporation.
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
27 27 * All rights reserved.
28 28 */
29 29
30 30 #include <sys/cpu_pm.h>
31 31 #include <sys/x86_archext.h>
32 32 #include <sys/sdt.h>
33 33 #include <sys/spl.h>
34 34 #include <sys/machsystm.h>
35 35 #include <sys/archsystm.h>
36 36 #include <sys/hpet.h>
37 -#include <sys/acpi/acpi.h>
37 +#include <acpica/include/acpi.h>
38 38 #include <sys/acpica.h>
39 39 #include <sys/cpupm.h>
40 40 #include <sys/cpu_idle.h>
41 41 #include <sys/cpu_acpi.h>
42 42 #include <sys/cpupm_throttle.h>
43 43 #include <sys/dtrace.h>
44 44 #include <sys/note.h>
45 45
46 46 /*
47 47 * This callback is used to build the PPM CPU domains once
48 48 * a CPU device has been started. The callback is initialized
49 49 * by the PPM driver to point to a routine that will build the
50 50 * domains.
51 51 */
52 52 void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *);
53 53
54 54 /*
55 55 * This callback is used to remove CPU from the PPM CPU domains
56 56 * when the cpu driver is detached. The callback is initialized
57 57 * by the PPM driver to point to a routine that will remove CPU
58 58 * from the domains.
59 59 */
60 60 void (*cpupm_ppm_free_pstate_domains)(cpu_t *);
61 61
62 62 /*
63 63 * This callback is used to redefine the topspeed for a CPU device.
64 64 * Since all CPUs in a domain should have identical properties, this
65 65 * callback is initialized by the PPM driver to point to a routine
66 66 * that will redefine the topspeed for all devices in a CPU domain.
67 67 * This callback is exercised whenever an ACPI _PPC change notification
68 68 * is received by the CPU driver.
69 69 */
70 70 void (*cpupm_redefine_topspeed)(void *);
71 71
72 72 /*
73 73 * This callback is used by the PPM driver to call into the CPU driver
74 74 * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value).
75 75 */
76 76 void (*cpupm_set_topspeed_callb)(void *, int);
77 77
78 78 /*
79 79 * This callback is used by the PPM driver to call into the CPU driver
80 80 * to set a new topspeed for a CPU.
81 81 */
82 82 int (*cpupm_get_topspeed_callb)(void *);
83 83
84 84 static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *);
85 85 static void cpupm_free_notify_handlers(cpu_t *);
86 86 static void cpupm_power_manage_notifications(void *);
87 87
88 88 /*
89 89 * Until proven otherwise, all power states are manageable.
90 90 */
91 91 static uint32_t cpupm_enabled = CPUPM_ALL_STATES;
92 92
93 93 cpupm_state_domains_t *cpupm_pstate_domains = NULL;
94 94 cpupm_state_domains_t *cpupm_tstate_domains = NULL;
95 95 cpupm_state_domains_t *cpupm_cstate_domains = NULL;
96 96
97 97 /*
98 98 * c-state tunables
99 99 *
100 100 * cpupm_cs_sample_interval is the length of time we wait before
101 101 * recalculating c-state statistics. When a CPU goes idle it checks
102 102 * to see if it has been longer than cpupm_cs_sample_interval since it last
103 103 * caculated which C-state to go to.
104 104 *
105 105 * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle
106 106 * divided by time spent in the idle state transitions.
107 107 * A value of 10 means the CPU will not spend more than 1/10 of its time
108 108 * in idle latency. The worst case performance will be 90% of non Deep C-state
109 109 * kernel.
110 110 *
111 111 * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state
112 112 * before it is worth going there. Expressed as a multiple of latency.
113 113 */
114 114 uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */
115 115 uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */
116 116 uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */
117 117 uint16_t cpupm_C2_idle_pct_tunable = 70;
118 118 uint16_t cpupm_C3_idle_pct_tunable = 80;
119 119
120 120 #ifndef __xpv
121 121 extern boolean_t cpupm_intel_init(cpu_t *);
122 122 extern boolean_t cpupm_amd_init(cpu_t *);
123 123
124 124 typedef struct cpupm_vendor {
125 125 boolean_t (*cpuv_init)(cpu_t *);
126 126 } cpupm_vendor_t;
127 127
128 128 /*
129 129 * Table of supported vendors.
130 130 */
131 131 static cpupm_vendor_t cpupm_vendors[] = {
132 132 cpupm_intel_init,
133 133 cpupm_amd_init,
134 134 NULL
135 135 };
136 136 #endif
137 137
138 138 /*
139 139 * Initialize the machine.
140 140 * See if a module exists for managing power for this CPU.
141 141 */
142 142 /*ARGSUSED*/
143 143 void
144 144 cpupm_init(cpu_t *cp)
145 145 {
146 146 #ifndef __xpv
147 147 cpupm_vendor_t *vendors;
148 148 cpupm_mach_state_t *mach_state;
149 149 struct machcpu *mcpu = &(cp->cpu_m);
150 150 static boolean_t first = B_TRUE;
151 151 int *speeds;
152 152 uint_t nspeeds;
153 153 int ret;
154 154
155 155 mach_state = cp->cpu_m.mcpu_pm_mach_state =
156 156 kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP);
157 157 mach_state->ms_caps = CPUPM_NO_STATES;
158 158 mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL);
159 159
160 160 mach_state->ms_acpi_handle = cpu_acpi_init(cp);
161 161 if (mach_state->ms_acpi_handle == NULL) {
162 162 cpupm_fini(cp);
163 163 cmn_err(CE_WARN, "!cpupm_init: processor %d: "
164 164 "unable to get ACPI handle", cp->cpu_id);
165 165 cmn_err(CE_NOTE, "!CPU power management will not function.");
166 166 CPUPM_DISABLE();
167 167 first = B_FALSE;
168 168 return;
169 169 }
170 170
171 171 /*
172 172 * Loop through the CPU management module table and see if
173 173 * any of the modules implement CPU power management
174 174 * for this CPU.
175 175 */
176 176 for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) {
177 177 if (vendors->cpuv_init(cp))
178 178 break;
179 179 }
180 180
181 181 /*
182 182 * Nope, we can't power manage this CPU.
183 183 */
184 184 if (vendors == NULL) {
185 185 cpupm_fini(cp);
186 186 CPUPM_DISABLE();
187 187 first = B_FALSE;
188 188 return;
189 189 }
190 190
191 191 /*
192 192 * If P-state support exists for this system, then initialize it.
193 193 */
194 194 if (mach_state->ms_pstate.cma_ops != NULL) {
195 195 ret = mach_state->ms_pstate.cma_ops->cpus_init(cp);
196 196 if (ret != 0) {
197 197 mach_state->ms_pstate.cma_ops = NULL;
198 198 cpupm_disable(CPUPM_P_STATES);
199 199 } else {
200 200 nspeeds = cpupm_get_speeds(cp, &speeds);
201 201 if (nspeeds == 0) {
202 202 cmn_err(CE_NOTE, "!cpupm_init: processor %d:"
203 203 " no speeds to manage", cp->cpu_id);
204 204 } else {
205 205 cpupm_set_supp_freqs(cp, speeds, nspeeds);
206 206 cpupm_free_speeds(speeds, nspeeds);
207 207 mach_state->ms_caps |= CPUPM_P_STATES;
208 208 }
209 209 }
210 210 } else {
211 211 cpupm_disable(CPUPM_P_STATES);
212 212 }
213 213
214 214 if (mach_state->ms_tstate.cma_ops != NULL) {
215 215 ret = mach_state->ms_tstate.cma_ops->cpus_init(cp);
216 216 if (ret != 0) {
217 217 mach_state->ms_tstate.cma_ops = NULL;
218 218 cpupm_disable(CPUPM_T_STATES);
219 219 } else {
220 220 mach_state->ms_caps |= CPUPM_T_STATES;
221 221 }
222 222 } else {
223 223 cpupm_disable(CPUPM_T_STATES);
224 224 }
225 225
226 226 /*
227 227 * If C-states support exists for this system, then initialize it.
228 228 */
229 229 if (mach_state->ms_cstate.cma_ops != NULL) {
230 230 ret = mach_state->ms_cstate.cma_ops->cpus_init(cp);
231 231 if (ret != 0) {
232 232 mach_state->ms_cstate.cma_ops = NULL;
233 233 mcpu->max_cstates = CPU_ACPI_C1;
234 234 cpupm_disable(CPUPM_C_STATES);
235 235 idle_cpu = non_deep_idle_cpu;
236 236 disp_enq_thread = non_deep_idle_disp_enq_thread;
237 237 } else if (cpu_deep_cstates_supported()) {
238 238 mcpu->max_cstates = cpu_acpi_get_max_cstates(
239 239 mach_state->ms_acpi_handle);
240 240 if (mcpu->max_cstates > CPU_ACPI_C1) {
241 241 (void) cstate_timer_callback(
242 242 CST_EVENT_MULTIPLE_CSTATES);
243 243 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
244 244 mcpu->mcpu_idle_type = CPU_ACPI_C1;
245 245 disp_enq_thread = cstate_wakeup;
246 246 } else {
247 247 (void) cstate_timer_callback(
248 248 CST_EVENT_ONE_CSTATE);
249 249 }
250 250 mach_state->ms_caps |= CPUPM_C_STATES;
251 251 } else {
252 252 mcpu->max_cstates = CPU_ACPI_C1;
253 253 idle_cpu = non_deep_idle_cpu;
254 254 disp_enq_thread = non_deep_idle_disp_enq_thread;
255 255 }
256 256 } else {
257 257 cpupm_disable(CPUPM_C_STATES);
258 258 }
259 259
260 260
261 261 if (mach_state->ms_caps == CPUPM_NO_STATES) {
262 262 cpupm_fini(cp);
263 263 CPUPM_DISABLE();
264 264 first = B_FALSE;
265 265 return;
266 266 }
267 267
268 268 if ((mach_state->ms_caps & CPUPM_T_STATES) ||
269 269 (mach_state->ms_caps & CPUPM_P_STATES) ||
270 270 (mach_state->ms_caps & CPUPM_C_STATES)) {
271 271 if (first) {
272 272 acpica_write_cpupm_capabilities(
273 273 mach_state->ms_caps & CPUPM_P_STATES,
274 274 mach_state->ms_caps & CPUPM_C_STATES);
275 275 }
276 276 if (mach_state->ms_caps & CPUPM_T_STATES) {
277 277 cpupm_throttle_manage_notification(cp);
278 278 }
279 279 if (mach_state->ms_caps & CPUPM_C_STATES) {
280 280 cpuidle_manage_cstates(cp);
281 281 }
282 282 if (mach_state->ms_caps & CPUPM_P_STATES) {
283 283 cpupm_power_manage_notifications(cp);
284 284 }
285 285 cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp);
286 286 }
287 287 first = B_FALSE;
288 288 #endif
289 289 }
290 290
291 291 /*
292 292 * Free any resources allocated during cpupm initialization or cpupm start.
293 293 */
294 294 /*ARGSUSED*/
295 295 void
296 296 cpupm_free(cpu_t *cp, boolean_t cpupm_stop)
297 297 {
298 298 #ifndef __xpv
299 299 cpupm_mach_state_t *mach_state =
300 300 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
301 301
302 302 if (mach_state == NULL)
303 303 return;
304 304
305 305 if (mach_state->ms_pstate.cma_ops != NULL) {
306 306 if (cpupm_stop)
307 307 mach_state->ms_pstate.cma_ops->cpus_stop(cp);
308 308 else
309 309 mach_state->ms_pstate.cma_ops->cpus_fini(cp);
310 310 mach_state->ms_pstate.cma_ops = NULL;
311 311 }
312 312
313 313 if (mach_state->ms_tstate.cma_ops != NULL) {
314 314 if (cpupm_stop)
315 315 mach_state->ms_tstate.cma_ops->cpus_stop(cp);
316 316 else
317 317 mach_state->ms_tstate.cma_ops->cpus_fini(cp);
318 318 mach_state->ms_tstate.cma_ops = NULL;
319 319 }
320 320
321 321 if (mach_state->ms_cstate.cma_ops != NULL) {
322 322 if (cpupm_stop)
323 323 mach_state->ms_cstate.cma_ops->cpus_stop(cp);
324 324 else
325 325 mach_state->ms_cstate.cma_ops->cpus_fini(cp);
326 326
327 327 mach_state->ms_cstate.cma_ops = NULL;
328 328 }
329 329
330 330 cpupm_free_notify_handlers(cp);
331 331
332 332 if (mach_state->ms_acpi_handle != NULL) {
333 333 cpu_acpi_fini(mach_state->ms_acpi_handle);
334 334 mach_state->ms_acpi_handle = NULL;
335 335 }
336 336
337 337 mutex_destroy(&mach_state->ms_lock);
338 338 kmem_free(mach_state, sizeof (cpupm_mach_state_t));
339 339 cp->cpu_m.mcpu_pm_mach_state = NULL;
340 340 #endif
341 341 }
342 342
343 343 void
344 344 cpupm_fini(cpu_t *cp)
345 345 {
346 346 /*
347 347 * call (*cpus_fini)() ops to release the cpupm resource
348 348 * in the P/C/T-state driver
349 349 */
350 350 cpupm_free(cp, B_FALSE);
351 351 }
352 352
353 353 void
354 354 cpupm_start(cpu_t *cp)
355 355 {
356 356 cpupm_init(cp);
357 357 }
358 358
359 359 void
360 360 cpupm_stop(cpu_t *cp)
361 361 {
362 362 /*
363 363 * call (*cpus_stop)() ops to reclaim the cpupm resource
364 364 * in the P/C/T-state driver
365 365 */
366 366 cpupm_free(cp, B_TRUE);
367 367 }
368 368
369 369 /*
370 370 * If A CPU has started and at least one power state is manageable,
371 371 * then the CPU is ready for power management.
372 372 */
373 373 boolean_t
374 374 cpupm_is_ready(cpu_t *cp)
375 375 {
376 376 #ifndef __xpv
377 377 cpupm_mach_state_t *mach_state =
378 378 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
379 379 uint32_t cpupm_caps = mach_state->ms_caps;
380 380
381 381 if (cpupm_enabled == CPUPM_NO_STATES)
382 382 return (B_FALSE);
383 383
384 384 if ((cpupm_caps & CPUPM_T_STATES) ||
385 385 (cpupm_caps & CPUPM_P_STATES) ||
386 386 (cpupm_caps & CPUPM_C_STATES))
387 387
388 388 return (B_TRUE);
389 389 return (B_FALSE);
390 390 #else
391 391 _NOTE(ARGUNUSED(cp));
392 392 return (B_FALSE);
393 393 #endif
394 394 }
395 395
396 396 boolean_t
397 397 cpupm_is_enabled(uint32_t state)
398 398 {
399 399 return ((cpupm_enabled & state) == state);
400 400 }
401 401
402 402 /*
403 403 * By default, all states are enabled.
404 404 */
405 405 void
406 406 cpupm_disable(uint32_t state)
407 407 {
408 408
409 409 if (state & CPUPM_P_STATES) {
410 410 cpupm_free_domains(&cpupm_pstate_domains);
411 411 }
412 412 if (state & CPUPM_T_STATES) {
413 413 cpupm_free_domains(&cpupm_tstate_domains);
414 414 }
415 415 if (state & CPUPM_C_STATES) {
416 416 cpupm_free_domains(&cpupm_cstate_domains);
417 417 }
418 418 cpupm_enabled &= ~state;
419 419 }
420 420
421 421 /*
422 422 * Allocate power domains for C,P and T States
423 423 */
424 424 void
425 425 cpupm_alloc_domains(cpu_t *cp, int state)
426 426 {
427 427 cpupm_mach_state_t *mach_state =
428 428 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
429 429 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
430 430 cpupm_state_domains_t **dom_ptr;
431 431 cpupm_state_domains_t *dptr;
432 432 cpupm_state_domains_t **mach_dom_state_ptr;
433 433 uint32_t domain;
434 434 uint32_t type;
435 435
436 436 switch (state) {
437 437 case CPUPM_P_STATES:
438 438 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) {
439 439 domain = CPU_ACPI_PSD(handle).sd_domain;
440 440 type = CPU_ACPI_PSD(handle).sd_type;
441 441 } else {
442 442 if (MUTEX_HELD(&cpu_lock)) {
443 443 domain = cpuid_get_chipid(cp);
444 444 } else {
445 445 mutex_enter(&cpu_lock);
446 446 domain = cpuid_get_chipid(cp);
447 447 mutex_exit(&cpu_lock);
448 448 }
449 449 type = CPU_ACPI_HW_ALL;
450 450 }
451 451 dom_ptr = &cpupm_pstate_domains;
452 452 mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain;
453 453 break;
454 454 case CPUPM_T_STATES:
455 455 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) {
456 456 domain = CPU_ACPI_TSD(handle).sd_domain;
457 457 type = CPU_ACPI_TSD(handle).sd_type;
458 458 } else {
459 459 if (MUTEX_HELD(&cpu_lock)) {
460 460 domain = cpuid_get_chipid(cp);
461 461 } else {
462 462 mutex_enter(&cpu_lock);
463 463 domain = cpuid_get_chipid(cp);
464 464 mutex_exit(&cpu_lock);
465 465 }
466 466 type = CPU_ACPI_HW_ALL;
467 467 }
468 468 dom_ptr = &cpupm_tstate_domains;
469 469 mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain;
470 470 break;
471 471 case CPUPM_C_STATES:
472 472 if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) {
473 473 domain = CPU_ACPI_CSD(handle).sd_domain;
474 474 type = CPU_ACPI_CSD(handle).sd_type;
475 475 } else {
476 476 if (MUTEX_HELD(&cpu_lock)) {
477 477 domain = cpuid_get_coreid(cp);
478 478 } else {
479 479 mutex_enter(&cpu_lock);
480 480 domain = cpuid_get_coreid(cp);
481 481 mutex_exit(&cpu_lock);
482 482 }
483 483 type = CPU_ACPI_HW_ALL;
484 484 }
485 485 dom_ptr = &cpupm_cstate_domains;
486 486 mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain;
487 487 break;
488 488 default:
489 489 return;
490 490 }
491 491
492 492 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) {
493 493 if (dptr->pm_domain == domain)
494 494 break;
495 495 }
496 496
497 497 /* new domain is created and linked at the head */
498 498 if (dptr == NULL) {
499 499 dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP);
500 500 dptr->pm_domain = domain;
501 501 dptr->pm_type = type;
502 502 dptr->pm_next = *dom_ptr;
503 503 mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN,
504 504 (void *)ipltospl(DISP_LEVEL));
505 505 CPUSET_ZERO(dptr->pm_cpus);
506 506 *dom_ptr = dptr;
507 507 }
508 508 CPUSET_ADD(dptr->pm_cpus, cp->cpu_id);
509 509 *mach_dom_state_ptr = dptr;
510 510 }
511 511
512 512 /*
513 513 * Free C, P or T state power domains
514 514 */
515 515 void
516 516 cpupm_free_domains(cpupm_state_domains_t **dom_ptr)
517 517 {
518 518 cpupm_state_domains_t *this_domain, *next_domain;
519 519
520 520 this_domain = *dom_ptr;
521 521 while (this_domain != NULL) {
522 522 next_domain = this_domain->pm_next;
523 523 mutex_destroy(&this_domain->pm_lock);
524 524 kmem_free((void *)this_domain,
525 525 sizeof (cpupm_state_domains_t));
526 526 this_domain = next_domain;
527 527 }
528 528 *dom_ptr = NULL;
529 529 }
530 530
531 531 /*
532 532 * Remove CPU from C, P or T state power domains
533 533 */
534 534 void
535 535 cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr)
536 536 {
537 537 cpupm_mach_state_t *mach_state =
538 538 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
539 539 cpupm_state_domains_t *dptr;
540 540 uint32_t pm_domain;
541 541
542 542 ASSERT(mach_state);
543 543
544 544 switch (state) {
545 545 case CPUPM_P_STATES:
546 546 pm_domain = mach_state->ms_pstate.cma_domain->pm_domain;
547 547 break;
548 548 case CPUPM_T_STATES:
549 549 pm_domain = mach_state->ms_tstate.cma_domain->pm_domain;
550 550 break;
551 551 case CPUPM_C_STATES:
552 552 pm_domain = mach_state->ms_cstate.cma_domain->pm_domain;
553 553 break;
554 554 default:
555 555 return;
556 556 }
557 557
558 558 /*
559 559 * Find the CPU C, P or T state power domain
560 560 */
561 561 for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) {
562 562 if (dptr->pm_domain == pm_domain)
563 563 break;
564 564 }
565 565
566 566 /*
567 567 * return if no matched domain found
568 568 */
569 569 if (dptr == NULL)
570 570 return;
571 571
572 572 /*
573 573 * We found one matched power domain, remove CPU from its cpuset.
574 574 * pm_lock(spin lock) here to avoid the race conditions between
575 575 * event change notification and cpu remove.
576 576 */
577 577 mutex_enter(&dptr->pm_lock);
578 578 if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id))
579 579 CPUSET_DEL(dptr->pm_cpus, cp->cpu_id);
580 580 mutex_exit(&dptr->pm_lock);
581 581 }
582 582
583 583 void
584 584 cpupm_alloc_ms_cstate(cpu_t *cp)
585 585 {
586 586 cpupm_mach_state_t *mach_state;
587 587 cpupm_mach_acpi_state_t *ms_cstate;
588 588
589 589 mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
590 590 ms_cstate = &mach_state->ms_cstate;
591 591 ASSERT(ms_cstate->cma_state.cstate == NULL);
592 592 ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t),
593 593 KM_SLEEP);
594 594 ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1;
595 595 }
596 596
597 597 void
598 598 cpupm_free_ms_cstate(cpu_t *cp)
599 599 {
600 600 cpupm_mach_state_t *mach_state =
601 601 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
602 602 cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate;
603 603
604 604 if (ms_cstate->cma_state.cstate != NULL) {
605 605 kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t));
606 606 ms_cstate->cma_state.cstate = NULL;
607 607 }
608 608 }
609 609
610 610 void
611 611 cpupm_state_change(cpu_t *cp, int level, int state)
612 612 {
613 613 cpupm_mach_state_t *mach_state =
614 614 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
615 615 cpupm_state_ops_t *state_ops;
616 616 cpupm_state_domains_t *state_domain;
617 617 cpuset_t set;
618 618
619 619 DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level);
620 620
621 621 if (mach_state == NULL) {
622 622 return;
623 623 }
624 624
625 625 switch (state) {
626 626 case CPUPM_P_STATES:
627 627 state_ops = mach_state->ms_pstate.cma_ops;
628 628 state_domain = mach_state->ms_pstate.cma_domain;
629 629 break;
630 630 case CPUPM_T_STATES:
631 631 state_ops = mach_state->ms_tstate.cma_ops;
632 632 state_domain = mach_state->ms_tstate.cma_domain;
633 633 break;
634 634 default:
635 635 break;
636 636 }
637 637
638 638 switch (state_domain->pm_type) {
639 639 case CPU_ACPI_SW_ANY:
640 640 /*
641 641 * A request on any CPU in the domain transitions the domain
642 642 */
643 643 CPUSET_ONLY(set, cp->cpu_id);
644 644 state_ops->cpus_change(set, level);
645 645 break;
646 646 case CPU_ACPI_SW_ALL:
647 647 /*
648 648 * All CPUs in the domain must request the transition
649 649 */
650 650 case CPU_ACPI_HW_ALL:
651 651 /*
652 652 * P/T-state transitions are coordinated by the hardware
653 653 * For now, request the transition on all CPUs in the domain,
654 654 * but looking ahead we can probably be smarter about this.
655 655 */
656 656 mutex_enter(&state_domain->pm_lock);
657 657 state_ops->cpus_change(state_domain->pm_cpus, level);
658 658 mutex_exit(&state_domain->pm_lock);
659 659 break;
660 660 default:
661 661 cmn_err(CE_NOTE, "Unknown domain coordination type: %d",
662 662 state_domain->pm_type);
663 663 }
664 664 }
665 665
666 666 /*
667 667 * CPU PM interfaces exposed to the CPU power manager
668 668 */
669 669 /*ARGSUSED*/
670 670 id_t
671 671 cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type)
672 672 {
673 673 cpupm_mach_state_t *mach_state =
674 674 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
675 675
676 676 if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) &&
677 677 !cpupm_is_enabled(CPUPM_C_STATES))) {
678 678 return (CPUPM_NO_DOMAIN);
679 679 }
680 680 if (type == CPUPM_DTYPE_ACTIVE) {
681 681 /*
682 682 * Return P-State domain for the specified CPU
683 683 */
684 684 if (mach_state->ms_pstate.cma_domain) {
685 685 return (mach_state->ms_pstate.cma_domain->pm_domain);
686 686 }
687 687 } else if (type == CPUPM_DTYPE_IDLE) {
688 688 /*
689 689 * Return C-State domain for the specified CPU
690 690 */
691 691 if (mach_state->ms_cstate.cma_domain) {
692 692 return (mach_state->ms_cstate.cma_domain->pm_domain);
693 693 }
694 694 }
695 695 return (CPUPM_NO_DOMAIN);
696 696 }
697 697
698 698 /*ARGSUSED*/
699 699 uint_t
700 700 cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type,
701 701 cpupm_state_t *states)
702 702 {
703 703 int *speeds;
704 704 uint_t nspeeds, i;
705 705
706 706 /*
707 707 * Idle domain support unimplemented
708 708 */
709 709 if (type != CPUPM_DTYPE_ACTIVE) {
710 710 return (0);
711 711 }
712 712 nspeeds = cpupm_get_speeds(cp, &speeds);
713 713
714 714 /*
715 715 * If the caller passes NULL for states, just return the
716 716 * number of states.
717 717 */
718 718 if (states != NULL) {
719 719 for (i = 0; i < nspeeds; i++) {
720 720 states[i].cps_speed = speeds[i];
721 721 states[i].cps_handle = (cpupm_handle_t)i;
722 722 }
723 723 }
724 724 cpupm_free_speeds(speeds, nspeeds);
725 725 return (nspeeds);
726 726 }
727 727
728 728 /*ARGSUSED*/
729 729 int
730 730 cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state)
731 731 {
732 732 if (!cpupm_is_ready(cp))
733 733 return (-1);
734 734
735 735 cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES);
736 736
737 737 return (0);
738 738 }
739 739
740 740 /*ARGSUSED*/
741 741 /*
742 742 * Note: It is the responsibility of the users of
743 743 * cpupm_get_speeds() to free the memory allocated
744 744 * for speeds using cpupm_free_speeds()
745 745 */
746 746 uint_t
747 747 cpupm_get_speeds(cpu_t *cp, int **speeds)
748 748 {
749 749 #ifndef __xpv
750 750 cpupm_mach_state_t *mach_state =
751 751 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
752 752 return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds));
753 753 #else
754 754 return (0);
755 755 #endif
756 756 }
757 757
758 758 /*ARGSUSED*/
759 759 void
760 760 cpupm_free_speeds(int *speeds, uint_t nspeeds)
761 761 {
762 762 #ifndef __xpv
763 763 cpu_acpi_free_speeds(speeds, nspeeds);
764 764 #endif
765 765 }
766 766
767 767 /*
768 768 * All CPU instances have been initialized successfully.
769 769 */
770 770 boolean_t
771 771 cpupm_power_ready(cpu_t *cp)
772 772 {
773 773 return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp));
774 774 }
775 775
776 776 /*
777 777 * All CPU instances have been initialized successfully.
778 778 */
779 779 boolean_t
780 780 cpupm_throttle_ready(cpu_t *cp)
781 781 {
782 782 return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp));
783 783 }
784 784
785 785 /*
786 786 * All CPU instances have been initialized successfully.
787 787 */
788 788 boolean_t
789 789 cpupm_cstate_ready(cpu_t *cp)
790 790 {
791 791 return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp));
792 792 }
793 793
794 794 void
795 795 cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
796 796 {
797 797 cpu_t *cp = ctx;
798 798 cpupm_mach_state_t *mach_state =
799 799 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
800 800 cpupm_notification_t *entry;
801 801
802 802 mutex_enter(&mach_state->ms_lock);
803 803 for (entry = mach_state->ms_handlers; entry != NULL;
804 804 entry = entry->nq_next) {
805 805 entry->nq_handler(obj, val, entry->nq_ctx);
806 806 }
807 807 mutex_exit(&mach_state->ms_lock);
808 808 }
809 809
810 810 /*ARGSUSED*/
811 811 void
812 812 cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx)
813 813 {
814 814 #ifndef __xpv
815 815 cpupm_mach_state_t *mach_state =
816 816 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
817 817 cpupm_notification_t *entry;
818 818
819 819 entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP);
820 820 entry->nq_handler = handler;
821 821 entry->nq_ctx = ctx;
822 822 mutex_enter(&mach_state->ms_lock);
823 823 if (mach_state->ms_handlers == NULL) {
824 824 entry->nq_next = NULL;
825 825 mach_state->ms_handlers = entry;
826 826 cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle,
827 827 cpupm_notify_handler, cp);
828 828
829 829 } else {
830 830 entry->nq_next = mach_state->ms_handlers;
831 831 mach_state->ms_handlers = entry;
832 832 }
833 833 mutex_exit(&mach_state->ms_lock);
834 834 #endif
835 835 }
836 836
837 837 /*ARGSUSED*/
838 838 static void
839 839 cpupm_free_notify_handlers(cpu_t *cp)
840 840 {
841 841 #ifndef __xpv
842 842 cpupm_mach_state_t *mach_state =
843 843 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
844 844 cpupm_notification_t *entry;
845 845 cpupm_notification_t *next;
846 846
847 847 mutex_enter(&mach_state->ms_lock);
848 848 if (mach_state->ms_handlers == NULL) {
849 849 mutex_exit(&mach_state->ms_lock);
850 850 return;
851 851 }
852 852 if (mach_state->ms_acpi_handle != NULL) {
853 853 cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle,
854 854 cpupm_notify_handler);
855 855 }
856 856 entry = mach_state->ms_handlers;
857 857 while (entry != NULL) {
858 858 next = entry->nq_next;
859 859 kmem_free(entry, sizeof (cpupm_notification_t));
860 860 entry = next;
861 861 }
862 862 mach_state->ms_handlers = NULL;
863 863 mutex_exit(&mach_state->ms_lock);
864 864 #endif
865 865 }
866 866
867 867 /*
868 868 * Get the current max speed from the ACPI _PPC object
869 869 */
870 870 /*ARGSUSED*/
871 871 int
872 872 cpupm_get_top_speed(cpu_t *cp)
873 873 {
874 874 #ifndef __xpv
875 875 cpupm_mach_state_t *mach_state;
876 876 cpu_acpi_handle_t handle;
877 877 int plat_level;
878 878 uint_t nspeeds;
879 879 int max_level;
880 880
881 881 mach_state =
882 882 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
883 883 handle = mach_state->ms_acpi_handle;
884 884
885 885 cpu_acpi_cache_ppc(handle);
886 886 plat_level = CPU_ACPI_PPC(handle);
887 887
888 888 nspeeds = CPU_ACPI_PSTATES_COUNT(handle);
889 889
890 890 max_level = nspeeds - 1;
891 891 if ((plat_level < 0) || (plat_level > max_level)) {
892 892 cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: "
893 893 "_PPC out of range %d", cp->cpu_id, plat_level);
894 894 plat_level = 0;
895 895 }
896 896
897 897 return (plat_level);
898 898 #else
899 899 return (0);
900 900 #endif
901 901 }
902 902
903 903 /*
904 904 * This notification handler is called whenever the ACPI _PPC
905 905 * object changes. The _PPC is a sort of governor on power levels.
906 906 * It sets an upper threshold on which, _PSS defined, power levels
907 907 * are usuable. The _PPC value is dynamic and may change as properties
908 908 * (i.e., thermal or AC source) of the system change.
909 909 */
910 910
911 911 static void
912 912 cpupm_power_manage_notifications(void *ctx)
913 913 {
914 914 cpu_t *cp = ctx;
915 915 int top_speed;
916 916
917 917 top_speed = cpupm_get_top_speed(cp);
918 918 cpupm_redefine_max_activepwr_state(cp, top_speed);
919 919 }
920 920
921 921 /* ARGSUSED */
922 922 static void
923 923 cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
924 924 {
925 925 #ifndef __xpv
926 926
927 927 cpu_t *cp = ctx;
928 928 cpupm_mach_state_t *mach_state =
929 929 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
930 930
931 931 if (mach_state == NULL)
932 932 return;
933 933
934 934 /*
935 935 * Currently, we handle _TPC,_CST and _PPC change notifications.
936 936 */
937 937 if (val == CPUPM_TPC_CHANGE_NOTIFICATION &&
938 938 mach_state->ms_caps & CPUPM_T_STATES) {
939 939 cpupm_throttle_manage_notification(ctx);
940 940 } else if (val == CPUPM_CST_CHANGE_NOTIFICATION &&
941 941 mach_state->ms_caps & CPUPM_C_STATES) {
942 942 cpuidle_manage_cstates(ctx);
943 943 } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION &&
944 944 mach_state->ms_caps & CPUPM_P_STATES) {
945 945 cpupm_power_manage_notifications(ctx);
946 946 }
947 947 #endif
948 948 }
949 949
950 950 /*
951 951 * Update cpupm cstate data each time CPU exits idle.
952 952 */
953 953 void
954 954 cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end)
955 955 {
956 956 cs_data->cs_idle_exit = end;
957 957 }
958 958
959 959 /*
960 960 * Determine next cstate based on cpupm data.
961 961 * Update cpupm cstate data each time CPU goes idle.
962 962 * Do as much as possible in the idle state bookkeeping function because the
963 963 * performance impact while idle is minimal compared to in the wakeup function
964 964 * when there is real work to do.
965 965 */
966 966 uint32_t
967 967 cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates,
968 968 uint32_t cs_count, hrtime_t start)
969 969 {
970 970 hrtime_t duration;
971 971 hrtime_t ave_interval;
972 972 hrtime_t ave_idle_time;
973 973 uint32_t i, smpl_cnt;
974 974
975 975 duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter;
976 976 scalehrtime(&duration);
977 977 cs_data->cs_idle += duration;
978 978 cs_data->cs_idle_enter = start;
979 979
980 980 smpl_cnt = ++cs_data->cs_cnt;
981 981 cs_data->cs_smpl_len = start - cs_data->cs_smpl_start;
982 982 scalehrtime(&cs_data->cs_smpl_len);
983 983 if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) {
984 984 cs_data->cs_smpl_idle = cs_data->cs_idle;
985 985 cs_data->cs_idle = 0;
986 986 cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) /
987 987 cs_data->cs_smpl_len);
988 988
989 989 cs_data->cs_smpl_start = start;
990 990 cs_data->cs_cnt = 0;
991 991
992 992 /*
993 993 * Strand level C-state policy
994 994 * The cpu_acpi_cstate_t *cstates array is not required to
995 995 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3.
996 996 * There are cs_count entries in the cstates array.
997 997 * cs_data->cs_next_cstate contains the index of the next
998 998 * C-state this CPU should enter.
999 999 */
1000 1000 ASSERT(cstates[0].cs_type == CPU_ACPI_C1);
1001 1001
1002 1002 /*
1003 1003 * Will CPU be idle long enough to save power?
1004 1004 */
1005 1005 ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000;
1006 1006 for (i = 1; i < cs_count; ++i) {
1007 1007 if (ave_idle_time < (cstates[i].cs_latency *
1008 1008 cpupm_cs_idle_save_tunable)) {
1009 1009 cs_count = i;
1010 1010 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *,
1011 1011 CPU, int, i);
1012 1012 }
1013 1013 }
1014 1014
1015 1015 /*
1016 1016 * Wakeup often (even when non-idle time is very short)?
1017 1017 * Some producer/consumer type loads fall into this category.
1018 1018 */
1019 1019 ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000;
1020 1020 for (i = 1; i < cs_count; ++i) {
1021 1021 if (ave_interval <= (cstates[i].cs_latency *
1022 1022 cpupm_cs_idle_cost_tunable)) {
1023 1023 cs_count = i;
1024 1024 DTRACE_PROBE2(cpupm__next__cstate, cpu_t *,
1025 1025 CPU, int, (CPU_MAX_CSTATES + i));
1026 1026 }
1027 1027 }
1028 1028
1029 1029 /*
1030 1030 * Idle percent
1031 1031 */
1032 1032 for (i = 1; i < cs_count; ++i) {
1033 1033 switch (cstates[i].cs_type) {
1034 1034 case CPU_ACPI_C2:
1035 1035 if (cs_data->cs_smpl_idle_pct <
1036 1036 cpupm_C2_idle_pct_tunable) {
1037 1037 cs_count = i;
1038 1038 DTRACE_PROBE2(cpupm__next__cstate,
1039 1039 cpu_t *, CPU, int,
1040 1040 ((2 * CPU_MAX_CSTATES) + i));
1041 1041 }
1042 1042 break;
1043 1043
1044 1044 case CPU_ACPI_C3:
1045 1045 if (cs_data->cs_smpl_idle_pct <
1046 1046 cpupm_C3_idle_pct_tunable) {
1047 1047 cs_count = i;
1048 1048 DTRACE_PROBE2(cpupm__next__cstate,
1049 1049 cpu_t *, CPU, int,
1050 1050 ((2 * CPU_MAX_CSTATES) + i));
1051 1051 }
1052 1052 break;
1053 1053 }
1054 1054 }
1055 1055
1056 1056 cs_data->cs_next_cstate = cs_count - 1;
1057 1057 }
1058 1058
1059 1059 return (cs_data->cs_next_cstate);
1060 1060 }
↓ open down ↓ |
1013 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX