Print this page
PANKOVs restructure
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
+++ new/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
24 24 */
25 25 /*
26 26 * Copyright (c) 2009-2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29
30 30 #include <sys/x86_archext.h>
31 31 #include <sys/machsystm.h>
32 32 #include <sys/x_call.h>
33 33 #include <sys/stat.h>
34 -#include <sys/acpi/acpi.h>
34 +#include <acpica/include/acpi.h>
35 35 #include <sys/acpica.h>
36 36 #include <sys/cpu_acpi.h>
37 37 #include <sys/cpu_idle.h>
38 38 #include <sys/cpupm.h>
39 39 #include <sys/cpu_event.h>
40 40 #include <sys/hpet.h>
41 41 #include <sys/archsystm.h>
42 42 #include <vm/hat_i86.h>
43 43 #include <sys/dtrace.h>
44 44 #include <sys/sdt.h>
45 45 #include <sys/callb.h>
46 46
47 47 #define CSTATE_USING_HPET 1
48 48 #define CSTATE_USING_LAT 2
49 49
50 50 #define CPU_IDLE_STOP_TIMEOUT 1000
51 51
52 52 extern void cpu_idle_adaptive(void);
53 53 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
54 54 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
55 55
56 56 static int cpu_idle_init(cpu_t *);
57 57 static void cpu_idle_fini(cpu_t *);
58 58 static void cpu_idle_stop(cpu_t *);
59 59 static boolean_t cpu_deep_idle_callb(void *arg, int code);
60 60 static boolean_t cpu_idle_cpr_callb(void *arg, int code);
61 61 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate);
62 62
63 63 static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer);
64 64
65 65 /*
66 66 * the flag of always-running local APIC timer.
67 67 * the flag of HPET Timer use in deep cstate.
68 68 */
69 69 static boolean_t cpu_cstate_arat = B_FALSE;
70 70 static boolean_t cpu_cstate_hpet = B_FALSE;
71 71
72 72 /*
73 73 * Interfaces for modules implementing Intel's deep c-state.
74 74 */
75 75 cpupm_state_ops_t cpu_idle_ops = {
76 76 "Generic ACPI C-state Support",
77 77 cpu_idle_init,
78 78 cpu_idle_fini,
79 79 NULL,
80 80 cpu_idle_stop
81 81 };
82 82
83 83 static kmutex_t cpu_idle_callb_mutex;
84 84 static callb_id_t cpu_deep_idle_callb_id;
85 85 static callb_id_t cpu_idle_cpr_callb_id;
86 86 static uint_t cpu_idle_cfg_state;
87 87
88 88 static kmutex_t cpu_idle_mutex;
89 89
90 90 cpu_idle_kstat_t cpu_idle_kstat = {
91 91 { "address_space_id", KSTAT_DATA_STRING },
92 92 { "latency", KSTAT_DATA_UINT32 },
93 93 { "power", KSTAT_DATA_UINT32 },
94 94 };
95 95
96 96 /*
97 97 * kstat update function of the c-state info
98 98 */
99 99 static int
100 100 cpu_idle_kstat_update(kstat_t *ksp, int flag)
101 101 {
102 102 cpu_acpi_cstate_t *cstate = ksp->ks_private;
103 103
104 104 if (flag == KSTAT_WRITE) {
105 105 return (EACCES);
106 106 }
107 107
108 108 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
109 109 kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
110 110 "FFixedHW");
111 111 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) {
112 112 kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
113 113 "SystemIO");
114 114 } else {
115 115 kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
116 116 "Unsupported");
117 117 }
118 118
119 119 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency;
120 120 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power;
121 121
122 122 return (0);
123 123 }
124 124
125 125 /*
126 126 * Used during configuration callbacks to manage implementation specific
127 127 * details of the hardware timer used during Deep C-state.
128 128 */
129 129 boolean_t
130 130 cstate_timer_callback(int code)
131 131 {
132 132 if (cpu_cstate_arat) {
133 133 return (B_TRUE);
134 134 } else if (cpu_cstate_hpet) {
135 135 return (hpet.callback(code));
136 136 }
137 137 return (B_FALSE);
138 138 }
139 139
140 140 /*
141 141 * Some Local APIC Timers do not work during Deep C-states.
142 142 * The Deep C-state idle function uses this function to ensure it is using a
143 143 * hardware timer that works during Deep C-states. This function also
144 144 * switches the timer back to the LACPI Timer after Deep C-state.
145 145 */
146 146 static boolean_t
147 147 cstate_use_timer(hrtime_t *lapic_expire, int timer)
148 148 {
149 149 if (cpu_cstate_arat)
150 150 return (B_TRUE);
151 151
152 152 /*
153 153 * We have to return B_FALSE if no arat or hpet support
154 154 */
155 155 if (!cpu_cstate_hpet)
156 156 return (B_FALSE);
157 157
158 158 switch (timer) {
159 159 case CSTATE_USING_HPET:
160 160 return (hpet.use_hpet_timer(lapic_expire));
161 161 case CSTATE_USING_LAT:
162 162 hpet.use_lapic_timer(*lapic_expire);
163 163 return (B_TRUE);
164 164 default:
165 165 return (B_FALSE);
166 166 }
167 167 }
168 168
169 169 /*
170 170 * c-state wakeup function.
171 171 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
172 172 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
173 173 */
174 174 void
175 175 cstate_wakeup(cpu_t *cp, int bound)
176 176 {
177 177 struct machcpu *mcpu = &(cp->cpu_m);
178 178 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait;
179 179 cpupart_t *cpu_part;
180 180 uint_t cpu_found;
181 181 processorid_t cpu_sid;
182 182
183 183 cpu_part = cp->cpu_part;
184 184 cpu_sid = cp->cpu_seqid;
185 185 /*
186 186 * Clear the halted bit for that CPU since it will be woken up
187 187 * in a moment.
188 188 */
189 189 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
190 190 /*
191 191 * Clear the halted bit for that CPU since it will be
192 192 * poked in a moment.
193 193 */
194 194 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid);
195 195
196 196 /*
197 197 * We may find the current CPU present in the halted cpuset
198 198 * if we're in the context of an interrupt that occurred
199 199 * before we had a chance to clear our bit in cpu_idle().
200 200 * Waking ourself is obviously unnecessary, since if
201 201 * we're here, we're not halted.
202 202 */
203 203 if (cp != CPU) {
204 204 /*
205 205 * Use correct wakeup mechanism
206 206 */
207 207 if ((mcpu_mwait != NULL) &&
208 208 (*mcpu_mwait == MWAIT_HALTED))
209 209 MWAIT_WAKEUP(cp);
210 210 else
211 211 poke_cpu(cp->cpu_id);
212 212 }
213 213 return;
214 214 } else {
215 215 /*
216 216 * This cpu isn't halted, but it's idle or undergoing a
217 217 * context switch. No need to awaken anyone else.
218 218 */
219 219 if (cp->cpu_thread == cp->cpu_idle_thread ||
220 220 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
221 221 return;
222 222 }
223 223
224 224 /*
225 225 * No need to wake up other CPUs if the thread we just enqueued
226 226 * is bound.
227 227 */
228 228 if (bound)
229 229 return;
230 230
231 231
232 232 /*
233 233 * See if there's any other halted CPUs. If there are, then
234 234 * select one, and awaken it.
235 235 * It's possible that after we find a CPU, somebody else
236 236 * will awaken it before we get the chance.
237 237 * In that case, look again.
238 238 */
239 239 do {
240 240 cpu_found = bitset_find(&cpu_part->cp_haltset);
241 241 if (cpu_found == (uint_t)-1)
242 242 return;
243 243
244 244 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset,
245 245 cpu_found) < 0);
246 246
247 247 /*
248 248 * Must use correct wakeup mechanism to avoid lost wakeup of
249 249 * alternate cpu.
250 250 */
251 251 if (cpu_found != CPU->cpu_seqid) {
252 252 mcpu_mwait = cpu_seq[cpu_found]->cpu_m.mcpu_mwait;
253 253 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED))
254 254 MWAIT_WAKEUP(cpu_seq[cpu_found]);
255 255 else
256 256 poke_cpu(cpu_seq[cpu_found]->cpu_id);
257 257 }
258 258 }
259 259
260 260 /*
261 261 * Function called by CPU idle notification framework to check whether CPU
262 262 * has been awakened. It will be called with interrupt disabled.
263 263 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
264 264 * notification framework.
265 265 */
266 266 static void
267 267 acpi_cpu_mwait_check_wakeup(void *arg)
268 268 {
269 269 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
270 270
271 271 ASSERT(arg != NULL);
272 272 if (*mcpu_mwait != MWAIT_HALTED) {
273 273 /*
274 274 * CPU has been awakened, notify CPU idle notification system.
275 275 */
276 276 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
277 277 } else {
278 278 /*
279 279 * Toggle interrupt flag to detect pending interrupts.
280 280 * If interrupt happened, do_interrupt() will notify CPU idle
281 281 * notification framework so no need to call cpu_idle_exit()
282 282 * here.
283 283 */
284 284 sti();
285 285 SMT_PAUSE();
286 286 cli();
287 287 }
288 288 }
289 289
290 290 static void
291 291 acpi_cpu_mwait_ipi_check_wakeup(void *arg)
292 292 {
293 293 volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
294 294
295 295 ASSERT(arg != NULL);
296 296 if (*mcpu_mwait != MWAIT_WAKEUP_IPI) {
297 297 /*
298 298 * CPU has been awakened, notify CPU idle notification system.
299 299 */
300 300 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
301 301 } else {
302 302 /*
303 303 * Toggle interrupt flag to detect pending interrupts.
304 304 * If interrupt happened, do_interrupt() will notify CPU idle
305 305 * notification framework so no need to call cpu_idle_exit()
306 306 * here.
307 307 */
308 308 sti();
309 309 SMT_PAUSE();
310 310 cli();
311 311 }
312 312 }
313 313
314 314 /*ARGSUSED*/
315 315 static void
316 316 acpi_cpu_check_wakeup(void *arg)
317 317 {
318 318 /*
319 319 * Toggle interrupt flag to detect pending interrupts.
320 320 * If interrupt happened, do_interrupt() will notify CPU idle
321 321 * notification framework so no need to call cpu_idle_exit() here.
322 322 */
323 323 sti();
324 324 SMT_PAUSE();
325 325 cli();
326 326 }
327 327
328 328 /*
329 329 * enter deep c-state handler
330 330 */
331 331 static void
332 332 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
333 333 {
334 334 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait;
335 335 cpu_t *cpup = CPU;
336 336 processorid_t cpu_sid = cpup->cpu_seqid;
337 337 cpupart_t *cp = cpup->cpu_part;
338 338 hrtime_t lapic_expire;
339 339 uint8_t type = cstate->cs_addrspace_id;
340 340 uint32_t cs_type = cstate->cs_type;
341 341 int hset_update = 1;
342 342 boolean_t using_timer;
343 343 cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup;
344 344
345 345 /*
346 346 * Set our mcpu_mwait here, so we can tell if anyone tries to
347 347 * wake us between now and when we call mwait. No other cpu will
348 348 * attempt to set our mcpu_mwait until we add ourself to the haltset.
349 349 */
350 350 if (mcpu_mwait) {
351 351 if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
352 352 *mcpu_mwait = MWAIT_WAKEUP_IPI;
353 353 check_func = &acpi_cpu_mwait_ipi_check_wakeup;
354 354 } else {
355 355 *mcpu_mwait = MWAIT_HALTED;
356 356 check_func = &acpi_cpu_mwait_check_wakeup;
357 357 }
358 358 }
359 359
360 360 /*
361 361 * If this CPU is online, and there are multiple CPUs
362 362 * in the system, then we should note our halting
363 363 * by adding ourselves to the partition's halted CPU
364 364 * bitmap. This allows other CPUs to find/awaken us when
365 365 * work becomes available.
366 366 */
367 367 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
368 368 hset_update = 0;
369 369
370 370 /*
371 371 * Add ourselves to the partition's halted CPUs bitmask
372 372 * and set our HALTED flag, if necessary.
373 373 *
374 374 * When a thread becomes runnable, it is placed on the queue
375 375 * and then the halted cpuset is checked to determine who
376 376 * (if anyone) should be awakened. We therefore need to first
377 377 * add ourselves to the halted cpuset, and and then check if there
378 378 * is any work available.
379 379 *
380 380 * Note that memory barriers after updating the HALTED flag
381 381 * are not necessary since an atomic operation (updating the bitmap)
382 382 * immediately follows. On x86 the atomic operation acts as a
383 383 * memory barrier for the update of cpu_disp_flags.
384 384 */
385 385 if (hset_update) {
386 386 cpup->cpu_disp_flags |= CPU_DISP_HALTED;
387 387 bitset_atomic_add(&cp->cp_haltset, cpu_sid);
388 388 }
389 389
390 390 /*
391 391 * Check to make sure there's really nothing to do.
392 392 * Work destined for this CPU may become available after
393 393 * this check. We'll be notified through the clearing of our
394 394 * bit in the halted CPU bitmask, and a write to our mcpu_mwait.
395 395 *
396 396 * disp_anywork() checks disp_nrunnable, so we do not have to later.
397 397 */
398 398 if (disp_anywork()) {
399 399 if (hset_update) {
400 400 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
401 401 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
402 402 }
403 403 return;
404 404 }
405 405
406 406 /*
407 407 * We're on our way to being halted.
408 408 *
409 409 * The local APIC timer can stop in ACPI C2 and deeper c-states.
410 410 * Try to program the HPET hardware to substitute for this CPU's
411 411 * LAPIC timer.
412 412 * cstate_use_timer() could disable the LAPIC Timer. Make sure
413 413 * to start the LAPIC Timer again before leaving this function.
414 414 *
415 415 * Disable interrupts here so we will awaken immediately after halting
416 416 * if someone tries to poke us between now and the time we actually
417 417 * halt.
418 418 */
419 419 cli();
420 420 using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET);
421 421
422 422 /*
423 423 * We check for the presence of our bit after disabling interrupts.
424 424 * If it's cleared, we'll return. If the bit is cleared after
425 425 * we check then the cstate_wakeup() will pop us out of the halted
426 426 * state.
427 427 *
428 428 * This means that the ordering of the cstate_wakeup() and the clearing
429 429 * of the bit by cpu_wakeup is important.
430 430 * cpu_wakeup() must clear our mc_haltset bit, and then call
431 431 * cstate_wakeup().
432 432 * acpi_cpu_cstate() must disable interrupts, then check for the bit.
433 433 */
434 434 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) {
435 435 (void) cstate_use_timer(&lapic_expire,
436 436 CSTATE_USING_LAT);
437 437 sti();
438 438 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
439 439 return;
440 440 }
441 441
442 442 /*
443 443 * The check for anything locally runnable is here for performance
444 444 * and isn't needed for correctness. disp_nrunnable ought to be
445 445 * in our cache still, so it's inexpensive to check, and if there
446 446 * is anything runnable we won't have to wait for the poke.
447 447 */
448 448 if (cpup->cpu_disp->disp_nrunnable != 0) {
449 449 (void) cstate_use_timer(&lapic_expire,
450 450 CSTATE_USING_LAT);
451 451 sti();
452 452 if (hset_update) {
453 453 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
454 454 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
455 455 }
456 456 return;
457 457 }
458 458
459 459 if (using_timer == B_FALSE) {
460 460
461 461 (void) cstate_use_timer(&lapic_expire,
462 462 CSTATE_USING_LAT);
463 463 sti();
464 464
465 465 /*
466 466 * We are currently unable to program the HPET to act as this
467 467 * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper
468 468 * because no timer is set to wake it up while its LAPIC timer
469 469 * stalls in deep C-States.
470 470 * Enter C1 instead.
471 471 *
472 472 * cstate_wake_cpu() will wake this CPU with an IPI which
473 473 * works with MWAIT.
474 474 */
475 475 i86_monitor(mcpu_mwait, 0, 0);
476 476 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) {
477 477 if (cpu_idle_enter(IDLE_STATE_C1, 0,
478 478 check_func, (void *)mcpu_mwait) == 0) {
479 479 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) ==
480 480 MWAIT_HALTED) {
481 481 i86_mwait(0, 0);
482 482 }
483 483 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
484 484 }
485 485 }
486 486
487 487 /*
488 488 * We're no longer halted
489 489 */
490 490 if (hset_update) {
491 491 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
492 492 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
493 493 }
494 494 return;
495 495 }
496 496
497 497 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
498 498 /*
499 499 * We're on our way to being halted.
500 500 * To avoid a lost wakeup, arm the monitor before checking
501 501 * if another cpu wrote to mcpu_mwait to wake us up.
502 502 */
503 503 i86_monitor(mcpu_mwait, 0, 0);
504 504 if (*mcpu_mwait == MWAIT_HALTED) {
505 505 if (cpu_idle_enter((uint_t)cs_type, 0,
506 506 check_func, (void *)mcpu_mwait) == 0) {
507 507 if (*mcpu_mwait == MWAIT_HALTED) {
508 508 i86_mwait(cstate->cs_address, 1);
509 509 }
510 510 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
511 511 }
512 512 }
513 513 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
514 514 uint32_t value;
515 515 ACPI_TABLE_FADT *gbl_FADT;
516 516
517 517 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
518 518 if (cpu_idle_enter((uint_t)cs_type, 0,
519 519 check_func, (void *)mcpu_mwait) == 0) {
520 520 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
521 521 (void) cpu_acpi_read_port(
522 522 cstate->cs_address, &value, 8);
523 523 acpica_get_global_FADT(&gbl_FADT);
524 524 (void) cpu_acpi_read_port(
525 525 gbl_FADT->XPmTimerBlock.Address,
526 526 &value, 32);
527 527 }
528 528 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
529 529 }
530 530 }
531 531 }
532 532
533 533 /*
534 534 * The LAPIC timer may have stopped in deep c-state.
535 535 * Reprogram this CPU's LAPIC here before enabling interrupts.
536 536 */
537 537 (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT);
538 538 sti();
539 539
540 540 /*
541 541 * We're no longer halted
542 542 */
543 543 if (hset_update) {
544 544 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
545 545 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
546 546 }
547 547 }
548 548
549 549 /*
550 550 * Idle the present CPU, deep c-state is supported
551 551 */
552 552 void
553 553 cpu_acpi_idle(void)
554 554 {
555 555 cpu_t *cp = CPU;
556 556 cpu_acpi_handle_t handle;
557 557 cma_c_state_t *cs_data;
558 558 cpu_acpi_cstate_t *cstates;
559 559 hrtime_t start, end;
560 560 int cpu_max_cstates;
561 561 uint32_t cs_indx;
562 562 uint16_t cs_type;
563 563
564 564 cpupm_mach_state_t *mach_state =
565 565 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
566 566 handle = mach_state->ms_acpi_handle;
567 567 ASSERT(CPU_ACPI_CSTATES(handle) != NULL);
568 568
569 569 cs_data = mach_state->ms_cstate.cma_state.cstate;
570 570 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
571 571 ASSERT(cstates != NULL);
572 572 cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
573 573 if (cpu_max_cstates > CPU_MAX_CSTATES)
574 574 cpu_max_cstates = CPU_MAX_CSTATES;
575 575 if (cpu_max_cstates == 1) { /* no ACPI c-state data */
576 576 (*non_deep_idle_cpu)();
577 577 return;
578 578 }
579 579
580 580 start = gethrtime_unscaled();
581 581
582 582 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start);
583 583
584 584 cs_type = cstates[cs_indx].cs_type;
585 585
586 586 switch (cs_type) {
587 587 default:
588 588 /* FALLTHROUGH */
589 589 case CPU_ACPI_C1:
590 590 (*non_deep_idle_cpu)();
591 591 break;
592 592
593 593 case CPU_ACPI_C2:
594 594 acpi_cpu_cstate(&cstates[cs_indx]);
595 595 break;
596 596
597 597 case CPU_ACPI_C3:
598 598 /*
599 599 * All supported Intel processors maintain cache coherency
600 600 * during C3. Currently when entering C3 processors flush
601 601 * core caches to higher level shared cache. The shared cache
602 602 * maintains state and supports probes during C3.
603 603 * Consequently there is no need to handle cache coherency
604 604 * and Bus Master activity here with the cache flush, BM_RLD
605 605 * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described
606 606 * in section 8.1.4 of the ACPI Specification 4.0.
607 607 */
608 608 acpi_cpu_cstate(&cstates[cs_indx]);
609 609 break;
610 610 }
611 611
612 612 end = gethrtime_unscaled();
613 613
614 614 /*
615 615 * Update statistics
616 616 */
617 617 cpupm_wakeup_cstate_data(cs_data, end);
618 618 }
619 619
620 620 boolean_t
621 621 cpu_deep_cstates_supported(void)
622 622 {
623 623 extern int idle_cpu_no_deep_c;
624 624
625 625 if (idle_cpu_no_deep_c)
626 626 return (B_FALSE);
627 627
628 628 if (!cpuid_deep_cstates_supported())
629 629 return (B_FALSE);
630 630
631 631 if (cpuid_arat_supported()) {
632 632 cpu_cstate_arat = B_TRUE;
633 633 return (B_TRUE);
634 634 }
635 635
636 636 if ((hpet.supported == HPET_FULL_SUPPORT) &&
637 637 hpet.install_proxy()) {
638 638 cpu_cstate_hpet = B_TRUE;
639 639 return (B_TRUE);
640 640 }
641 641
642 642 return (B_FALSE);
643 643 }
644 644
645 645 /*
646 646 * Validate that this processor supports deep cstate and if so,
647 647 * get the c-state data from ACPI and cache it.
648 648 */
649 649 static int
650 650 cpu_idle_init(cpu_t *cp)
651 651 {
652 652 cpupm_mach_state_t *mach_state =
653 653 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
654 654 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
655 655 cpu_acpi_cstate_t *cstate;
656 656 char name[KSTAT_STRLEN];
657 657 int cpu_max_cstates, i;
658 658 int ret;
659 659
660 660 /*
661 661 * Cache the C-state specific ACPI data.
662 662 */
663 663 if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) {
664 664 if (ret < 0)
665 665 cmn_err(CE_NOTE,
666 666 "!Support for CPU deep idle states is being "
667 667 "disabled due to errors parsing ACPI C-state "
668 668 "objects exported by BIOS.");
669 669 cpu_idle_fini(cp);
670 670 return (-1);
671 671 }
672 672
673 673 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
674 674
675 675 cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
676 676
677 677 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
678 678 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type);
679 679 /*
680 680 * Allocate, initialize and install cstate kstat
681 681 */
682 682 cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
683 683 name, "misc",
684 684 KSTAT_TYPE_NAMED,
685 685 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
686 686 KSTAT_FLAG_VIRTUAL);
687 687
688 688 if (cstate->cs_ksp == NULL) {
689 689 cmn_err(CE_NOTE, "kstat_create(c_state) fail");
690 690 } else {
691 691 cstate->cs_ksp->ks_data = &cpu_idle_kstat;
692 692 cstate->cs_ksp->ks_lock = &cpu_idle_mutex;
693 693 cstate->cs_ksp->ks_update = cpu_idle_kstat_update;
694 694 cstate->cs_ksp->ks_data_size += MAXNAMELEN;
695 695 cstate->cs_ksp->ks_private = cstate;
696 696 kstat_install(cstate->cs_ksp);
697 697 }
698 698 cstate++;
699 699 }
700 700
701 701 cpupm_alloc_domains(cp, CPUPM_C_STATES);
702 702 cpupm_alloc_ms_cstate(cp);
703 703
704 704 if (cpu_deep_cstates_supported()) {
705 705 uint32_t value;
706 706
707 707 mutex_enter(&cpu_idle_callb_mutex);
708 708 if (cpu_deep_idle_callb_id == (callb_id_t)0)
709 709 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb,
710 710 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle");
711 711 if (cpu_idle_cpr_callb_id == (callb_id_t)0)
712 712 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb,
713 713 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr");
714 714 mutex_exit(&cpu_idle_callb_mutex);
715 715
716 716
717 717 /*
718 718 * All supported CPUs (Nehalem and later) will remain in C3
719 719 * during Bus Master activity.
720 720 * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it
721 721 * is not already 0 before enabling Deeper C-states.
722 722 */
723 723 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value);
724 724 if (value & 1)
725 725 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
726 726 }
727 727
728 728 return (0);
729 729 }
730 730
731 731 /*
732 732 * Free resources allocated by cpu_idle_init().
733 733 */
734 734 static void
735 735 cpu_idle_fini(cpu_t *cp)
736 736 {
737 737 cpupm_mach_state_t *mach_state =
738 738 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
739 739 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
740 740 cpu_acpi_cstate_t *cstate;
741 741 uint_t cpu_max_cstates, i;
742 742
743 743 /*
744 744 * idle cpu points back to the generic one
745 745 */
746 746 idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
747 747 disp_enq_thread = non_deep_idle_disp_enq_thread;
748 748
749 749 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
750 750 if (cstate) {
751 751 cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
752 752
753 753 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
754 754 if (cstate->cs_ksp != NULL)
755 755 kstat_delete(cstate->cs_ksp);
756 756 cstate++;
757 757 }
758 758 }
759 759
760 760 cpupm_free_ms_cstate(cp);
761 761 cpupm_free_domains(&cpupm_cstate_domains);
762 762 cpu_acpi_free_cstate_data(handle);
763 763
764 764 mutex_enter(&cpu_idle_callb_mutex);
765 765 if (cpu_deep_idle_callb_id != (callb_id_t)0) {
766 766 (void) callb_delete(cpu_deep_idle_callb_id);
767 767 cpu_deep_idle_callb_id = (callb_id_t)0;
768 768 }
769 769 if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
770 770 (void) callb_delete(cpu_idle_cpr_callb_id);
771 771 cpu_idle_cpr_callb_id = (callb_id_t)0;
772 772 }
773 773 mutex_exit(&cpu_idle_callb_mutex);
774 774 }
775 775
776 776 /*
777 777 * This function is introduced here to solve a race condition
778 778 * between the master and the slave to touch c-state data structure.
779 779 * After the slave calls this idle function to switch to the non
780 780 * deep idle function, the master can go on to reclaim the resource.
781 781 */
782 782 static void
783 783 cpu_idle_stop_sync(void)
784 784 {
785 785 /* switch to the non deep idle function */
786 786 CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
787 787 }
788 788
789 789 static void
790 790 cpu_idle_stop(cpu_t *cp)
791 791 {
792 792 cpupm_mach_state_t *mach_state =
793 793 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
794 794 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
795 795 cpu_acpi_cstate_t *cstate;
796 796 uint_t cpu_max_cstates, i = 0;
797 797
798 798 mutex_enter(&cpu_idle_callb_mutex);
799 799 if (idle_cpu == cpu_idle_adaptive) {
800 800 /*
801 801 * invoke the slave to call synchronous idle function.
802 802 */
803 803 cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
804 804 poke_cpu(cp->cpu_id);
805 805
806 806 /*
807 807 * wait until the slave switchs to non deep idle function,
808 808 * so that the master is safe to go on to reclaim the resource.
809 809 */
810 810 while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
811 811 drv_usecwait(10);
812 812 if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
813 813 cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
814 814 " idle stop timeout");
815 815 }
816 816 }
817 817 mutex_exit(&cpu_idle_callb_mutex);
818 818
819 819 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
820 820 if (cstate) {
821 821 cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
822 822
823 823 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
824 824 if (cstate->cs_ksp != NULL)
825 825 kstat_delete(cstate->cs_ksp);
826 826 cstate++;
827 827 }
828 828 }
829 829 cpupm_free_ms_cstate(cp);
830 830 cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains);
831 831 cpu_acpi_free_cstate_data(handle);
832 832 }
833 833
834 834 /*ARGSUSED*/
835 835 static boolean_t
836 836 cpu_deep_idle_callb(void *arg, int code)
837 837 {
838 838 boolean_t rslt = B_TRUE;
839 839
840 840 mutex_enter(&cpu_idle_callb_mutex);
841 841 switch (code) {
842 842 case PM_DEFAULT_CPU_DEEP_IDLE:
843 843 /*
844 844 * Default policy is same as enable
845 845 */
846 846 /*FALLTHROUGH*/
847 847 case PM_ENABLE_CPU_DEEP_IDLE:
848 848 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
849 849 break;
850 850
851 851 if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) {
852 852 disp_enq_thread = cstate_wakeup;
853 853 idle_cpu = cpu_idle_adaptive;
854 854 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG;
855 855 } else {
856 856 rslt = B_FALSE;
857 857 }
858 858 break;
859 859
860 860 case PM_DISABLE_CPU_DEEP_IDLE:
861 861 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
862 862 break;
863 863
864 864 idle_cpu = non_deep_idle_cpu;
865 865 if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) {
866 866 disp_enq_thread = non_deep_idle_disp_enq_thread;
867 867 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG;
868 868 }
869 869 break;
870 870
871 871 default:
872 872 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n",
873 873 code);
874 874 break;
875 875 }
876 876 mutex_exit(&cpu_idle_callb_mutex);
877 877 return (rslt);
878 878 }
879 879
880 880 /*ARGSUSED*/
881 881 static boolean_t
882 882 cpu_idle_cpr_callb(void *arg, int code)
883 883 {
884 884 boolean_t rslt = B_TRUE;
885 885
886 886 mutex_enter(&cpu_idle_callb_mutex);
887 887 switch (code) {
888 888 case CB_CODE_CPR_RESUME:
889 889 if (cstate_timer_callback(CB_CODE_CPR_RESUME)) {
890 890 /*
891 891 * Do not enable dispatcher hooks if disabled by user.
892 892 */
893 893 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
894 894 break;
895 895
896 896 disp_enq_thread = cstate_wakeup;
897 897 idle_cpu = cpu_idle_adaptive;
898 898 } else {
899 899 rslt = B_FALSE;
900 900 }
901 901 break;
902 902
903 903 case CB_CODE_CPR_CHKPT:
904 904 idle_cpu = non_deep_idle_cpu;
905 905 disp_enq_thread = non_deep_idle_disp_enq_thread;
906 906 (void) cstate_timer_callback(CB_CODE_CPR_CHKPT);
907 907 break;
908 908
909 909 default:
910 910 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code);
911 911 break;
912 912 }
913 913 mutex_exit(&cpu_idle_callb_mutex);
914 914 return (rslt);
915 915 }
916 916
917 917 /*
918 918 * handle _CST notification
919 919 */
920 920 void
921 921 cpuidle_cstate_instance(cpu_t *cp)
922 922 {
923 923 #ifndef __xpv
924 924 cpupm_mach_state_t *mach_state =
925 925 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
926 926 cpu_acpi_handle_t handle;
927 927 struct machcpu *mcpu;
928 928 cpuset_t dom_cpu_set;
929 929 kmutex_t *pm_lock;
930 930 int result = 0;
931 931 processorid_t cpu_id;
932 932
933 933 if (mach_state == NULL) {
934 934 return;
935 935 }
936 936
937 937 ASSERT(mach_state->ms_cstate.cma_domain != NULL);
938 938 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus;
939 939 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock;
940 940
941 941 /*
942 942 * Do for all the CPU's in the domain
943 943 */
944 944 mutex_enter(pm_lock);
945 945 do {
946 946 CPUSET_FIND(dom_cpu_set, cpu_id);
947 947 if (cpu_id == CPUSET_NOTINSET)
948 948 break;
949 949
950 950 ASSERT(cpu_id >= 0 && cpu_id < NCPU);
951 951 cp = cpu[cpu_id];
952 952 mach_state = (cpupm_mach_state_t *)
953 953 cp->cpu_m.mcpu_pm_mach_state;
954 954 if (!(mach_state->ms_caps & CPUPM_C_STATES)) {
955 955 mutex_exit(pm_lock);
956 956 return;
957 957 }
958 958 handle = mach_state->ms_acpi_handle;
959 959 ASSERT(handle != NULL);
960 960
961 961 /*
962 962 * re-evaluate cstate object
963 963 */
964 964 if (cpu_acpi_cache_cstate_data(handle) != 0) {
965 965 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
966 966 " object Instance: %d", cpu_id);
967 967 }
968 968 mcpu = &(cp->cpu_m);
969 969 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
970 970 if (mcpu->max_cstates > CPU_ACPI_C1) {
971 971 (void) cstate_timer_callback(
972 972 CST_EVENT_MULTIPLE_CSTATES);
973 973 disp_enq_thread = cstate_wakeup;
974 974 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
975 975 } else if (mcpu->max_cstates == CPU_ACPI_C1) {
976 976 disp_enq_thread = non_deep_idle_disp_enq_thread;
977 977 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
978 978 (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
979 979 }
980 980
981 981 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
982 982 } while (result < 0);
983 983 mutex_exit(pm_lock);
984 984 #endif
985 985 }
986 986
987 987 /*
988 988 * handle the number or the type of available processor power states change
989 989 */
990 990 void
991 991 cpuidle_manage_cstates(void *ctx)
992 992 {
993 993 cpu_t *cp = ctx;
994 994 cpupm_mach_state_t *mach_state =
995 995 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
996 996 boolean_t is_ready;
997 997
998 998 if (mach_state == NULL) {
999 999 return;
1000 1000 }
1001 1001
1002 1002 /*
1003 1003 * We currently refuse to power manage if the CPU is not ready to
1004 1004 * take cross calls (cross calls fail silently if CPU is not ready
1005 1005 * for it).
1006 1006 *
1007 1007 * Additionally, for x86 platforms we cannot power manage an instance,
1008 1008 * until it has been initialized.
1009 1009 */
1010 1010 is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp);
1011 1011 if (!is_ready)
1012 1012 return;
1013 1013
1014 1014 cpuidle_cstate_instance(cp);
1015 1015 }
↓ open down ↓ |
971 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX