Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/mp_pc.c
+++ new/usr/src/uts/i86pc/os/mp_pc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24 /*
25 25 * Copyright (c) 2010, Intel Corporation.
26 26 * All rights reserved.
27 27 */
28 28 /*
29 - * Copyright 2011 Joyent, Inc. All rights reserved.
29 + * Copyright 2018 Joyent, Inc
30 30 */
31 31
32 32 /*
33 33 * Welcome to the world of the "real mode platter".
34 34 * See also startup.c, mpcore.s and apic.c for related routines.
35 35 */
36 36
37 37 #include <sys/types.h>
38 38 #include <sys/systm.h>
39 39 #include <sys/cpuvar.h>
40 40 #include <sys/cpu_module.h>
41 41 #include <sys/kmem.h>
42 42 #include <sys/archsystm.h>
43 43 #include <sys/machsystm.h>
44 44 #include <sys/controlregs.h>
45 45 #include <sys/x86_archext.h>
46 46 #include <sys/smp_impldefs.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/mach_mmu.h>
49 49 #include <sys/promif.h>
50 50 #include <sys/cpu.h>
51 51 #include <sys/cpu_event.h>
52 52 #include <sys/sunndi.h>
53 53 #include <sys/fs/dv_node.h>
54 54 #include <vm/hat_i86.h>
55 55 #include <vm/as.h>
56 56
57 57 extern cpuset_t cpu_ready_set;
58 58
59 59 extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot);
60 60 extern void real_mode_start_cpu(void);
61 61 extern void real_mode_start_cpu_end(void);
62 62 extern void real_mode_stop_cpu_stage1(void);
63 63 extern void real_mode_stop_cpu_stage1_end(void);
64 64 extern void real_mode_stop_cpu_stage2(void);
65 65 extern void real_mode_stop_cpu_stage2_end(void);
66 66
67 67 void rmp_gdt_init(rm_platter_t *);
68 68
69 69 /*
70 70 * Fill up the real mode platter to make it easy for real mode code to
71 71 * kick it off. This area should really be one passed by boot to kernel
72 72 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
73 73 * have identical physical and virtual address in paged mode.
74 74 */
75 75 static ushort_t *warm_reset_vector = NULL;
76 76
77 77 int
78 78 mach_cpucontext_init(void)
79 79 {
80 80 ushort_t *vec;
81 81 ulong_t addr;
82 82 struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
83 83
84 84 if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
85 85 sizeof (vec), PROT_READ | PROT_WRITE)))
86 86 return (-1);
87 87
88 88 /*
89 89 * setup secondary cpu bios boot up vector
90 90 * Write page offset to 0x467 and page frame number to 0x469.
91 91 */
92 92 addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
93 93 vec[0] = (ushort_t)(addr & PAGEOFFSET);
94 94 vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
95 95 warm_reset_vector = vec;
96 96
97 97 /* Map real mode platter into kas so kernel can access it. */
98 98 hat_devload(kas.a_hat,
99 99 (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
100 100 btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
101 101 HAT_LOAD_NOCONSIST);
102 102
103 103 /* Copy CPU startup code to rm_platter if it's still during boot. */
104 104 if (!plat_dr_enabled()) {
105 105 ASSERT((size_t)real_mode_start_cpu_end -
106 106 (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
107 107 bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
108 108 (size_t)real_mode_start_cpu_end -
109 109 (size_t)real_mode_start_cpu);
110 110 }
111 111
112 112 return (0);
113 113 }
114 114
115 115 void
116 116 mach_cpucontext_fini(void)
117 117 {
118 118 if (warm_reset_vector)
119 119 psm_unmap_phys((caddr_t)warm_reset_vector,
120 120 sizeof (warm_reset_vector));
121 121 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
122 122 HAT_UNLOAD);
123 123 }
124 124
125 125 #if defined(__amd64)
↓ open down ↓ |
86 lines elided |
↑ open up ↑ |
126 126 extern void *long_mode_64(void);
127 127 #endif /* __amd64 */
128 128
129 129 /*ARGSUSED*/
130 130 void
131 131 rmp_gdt_init(rm_platter_t *rm)
132 132 {
133 133
134 134 #if defined(__amd64)
135 135 /* Use the kas address space for the CPU startup thread. */
136 - if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
136 + if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) {
137 137 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
138 138 "located above 4G in physical memory (@ 0x%lx)",
139 - MAKECR3(kas.a_hat->hat_htable->ht_pfn));
139 + mmu_ptob(kas.a_hat->hat_htable->ht_pfn));
140 + }
140 141
141 142 /*
142 143 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
143 144 * by code in real_mode_start_cpu():
144 145 *
145 146 * GDT[0]: NULL selector
146 147 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
147 148 *
148 149 * Clear the IDT as interrupts will be off and a limit of 0 will cause
149 150 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
150 151 * a course of action as any other, though it may cause the entire
151 152 * platform to reset in some cases...
152 153 */
153 154 rm->rm_temp_gdt[0] = 0;
154 155 rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
155 156
156 157 rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
157 158 rm->rm_temp_gdt_base = rm_platter_pa +
158 159 (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
159 160 rm->rm_temp_idt_lim = 0;
160 161 rm->rm_temp_idt_base = 0;
161 162
162 163 /*
163 164 * Since the CPU needs to jump to protected mode using an identity
164 165 * mapped address, we need to calculate it here.
165 166 */
166 167 rm->rm_longmode64_addr = rm_platter_pa +
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
167 168 (uint32_t)((uintptr_t)long_mode_64 -
168 169 (uintptr_t)real_mode_start_cpu);
169 170 #endif /* __amd64 */
170 171 }
171 172
172 173 static void *
173 174 mach_cpucontext_alloc_tables(struct cpu *cp)
174 175 {
175 176 tss_t *ntss;
176 177 struct cpu_tables *ct;
178 + size_t ctsize;
177 179
178 180 /*
179 181 * Allocate space for stack, tss, gdt and idt. We round the size
180 182 * allotted for cpu_tables up, so that the TSS is on a unique page.
181 183 * This is more efficient when running in virtual machines.
182 184 */
183 - ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
185 + ctsize = P2ROUNDUP(sizeof (*ct), PAGESIZE);
186 + ct = kmem_zalloc(ctsize, KM_SLEEP);
184 187 if ((uintptr_t)ct & PAGEOFFSET)
185 188 panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
186 189 cp->cpu_id);
187 190
188 191 ntss = cp->cpu_tss = &ct->ct_tss;
189 192
190 193 #if defined(__amd64)
194 + uintptr_t va;
195 + size_t len;
191 196
192 197 /*
193 198 * #DF (double fault).
194 199 */
195 - ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)];
200 + ntss->tss_ist1 = (uintptr_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
196 201
202 + /*
203 + * #NM (non-maskable interrupt)
204 + */
205 + ntss->tss_ist2 = (uintptr_t)&ct->ct_stack2[sizeof (ct->ct_stack2)];
206 +
207 + /*
208 + * #MC (machine check exception / hardware error)
209 + */
210 + ntss->tss_ist3 = (uintptr_t)&ct->ct_stack3[sizeof (ct->ct_stack3)];
211 +
212 + /*
213 + * #DB, #BP debug interrupts and KDI/kmdb
214 + */
215 + ntss->tss_ist4 = (uintptr_t)&cp->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
216 +
217 + if (kpti_enable == 1) {
218 + /*
219 + * #GP, #PF, #SS fault interrupts
220 + */
221 + ntss->tss_ist5 = (uintptr_t)&cp->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
222 +
223 + /*
224 + * Used by all other interrupts
225 + */
226 + ntss->tss_ist6 = (uint64_t)&cp->cpu_m.mcpu_kpti.kf_tr_rsp;
227 +
228 + /*
229 + * On AMD64 we need to make sure that all of the pages of the
230 + * struct cpu_tables are punched through onto the user CPU for
231 + * kpti.
232 + *
233 + * The final page will always be the TSS, so treat that
234 + * separately.
235 + */
236 + for (va = (uintptr_t)ct, len = ctsize - MMU_PAGESIZE;
237 + len >= MMU_PAGESIZE;
238 + len -= MMU_PAGESIZE, va += MMU_PAGESIZE) {
239 + /* The doublefault stack must be RW */
240 + hati_cpu_punchin(cp, va, PROT_READ | PROT_WRITE);
241 + }
242 + ASSERT3U((uintptr_t)ntss, ==, va);
243 + hati_cpu_punchin(cp, (uintptr_t)ntss, PROT_READ);
244 + }
245 +
197 246 #elif defined(__i386)
198 247
199 248 ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
200 - (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)];
249 + (uint32_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
201 250
202 251 ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
203 252
204 253 ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
205 254
206 255 ntss->tss_cs = KCS_SEL;
207 256 ntss->tss_ds = ntss->tss_es = KDS_SEL;
208 257 ntss->tss_fs = KFS_SEL;
209 258 ntss->tss_gs = KGS_SEL;
210 259
211 260 #endif /* __i386 */
212 261
213 262 /*
214 263 * Set I/O bit map offset equal to size of TSS segment limit
215 264 * for no I/O permission map. This will cause all user I/O
216 265 * instructions to generate #gp fault.
217 266 */
218 267 ntss->tss_bitmapbase = sizeof (*ntss);
219 268
220 269 /*
221 270 * Setup kernel tss.
222 271 */
223 272 set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
224 273 sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
225 274
226 275 return (ct);
227 276 }
228 277
229 278 void *
230 279 mach_cpucontext_xalloc(struct cpu *cp, int optype)
231 280 {
232 281 size_t len;
233 282 struct cpu_tables *ct;
234 283 rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
235 284 static int cpu_halt_code_ready;
236 285
237 286 if (optype == MACH_CPUCONTEXT_OP_STOP) {
238 287 ASSERT(plat_dr_enabled());
239 288
240 289 /*
241 290 * The WARM_RESET_VECTOR has a limitation that the physical
242 291 * address written to it must be page-aligned. To work around
243 292 * this limitation, the CPU stop code has been splitted into
244 293 * two stages.
245 294 * The stage 2 code, which implements the real logic to halt
246 295 * CPUs, is copied to the rm_cpu_halt_code field in the real
247 296 * mode platter. The stage 1 code, which simply jumps to the
248 297 * stage 2 code in the rm_cpu_halt_code field, is copied to
249 298 * rm_code field in the real mode platter and it may be
250 299 * overwritten after the CPU has been stopped.
251 300 */
252 301 if (!cpu_halt_code_ready) {
253 302 /*
254 303 * The rm_cpu_halt_code field in the real mode platter
255 304 * is used by the CPU stop code only. So only copy the
256 305 * CPU stop stage 2 code into the rm_cpu_halt_code
257 306 * field on the first call.
258 307 */
259 308 len = (size_t)real_mode_stop_cpu_stage2_end -
260 309 (size_t)real_mode_stop_cpu_stage2;
261 310 ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
262 311 bcopy((caddr_t)real_mode_stop_cpu_stage2,
263 312 (caddr_t)rm->rm_cpu_halt_code, len);
264 313 cpu_halt_code_ready = 1;
265 314 }
266 315
267 316 /*
268 317 * The rm_code field in the real mode platter is shared by
269 318 * the CPU start, CPU stop, CPR and fast reboot code. So copy
270 319 * the CPU stop stage 1 code into the rm_code field every time.
271 320 */
272 321 len = (size_t)real_mode_stop_cpu_stage1_end -
273 322 (size_t)real_mode_stop_cpu_stage1;
274 323 ASSERT(len <= RM_PLATTER_CODE_SIZE);
275 324 bcopy((caddr_t)real_mode_stop_cpu_stage1,
276 325 (caddr_t)rm->rm_code, len);
277 326 rm->rm_cpu_halted = 0;
278 327
279 328 return (cp->cpu_m.mcpu_mach_ctx_ptr);
280 329 } else if (optype != MACH_CPUCONTEXT_OP_START) {
281 330 return (NULL);
282 331 }
283 332
284 333 /*
285 334 * Only need to allocate tables when starting CPU.
286 335 * Tables allocated when starting CPU will be reused when stopping CPU.
287 336 */
288 337 ct = mach_cpucontext_alloc_tables(cp);
289 338 if (ct == NULL) {
290 339 return (NULL);
291 340 }
292 341
293 342 /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
294 343 if (plat_dr_enabled()) {
295 344 bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
296 345 (size_t)real_mode_start_cpu_end -
297 346 (size_t)real_mode_start_cpu);
298 347 }
299 348
300 349 /*
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
301 350 * Now copy all that we've set up onto the real mode platter
302 351 * for the real mode code to digest as part of starting the cpu.
303 352 */
304 353 rm->rm_idt_base = cp->cpu_idt;
305 354 rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
306 355 rm->rm_gdt_base = cp->cpu_gdt;
307 356 rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
308 357
309 358 /*
310 359 * CPU needs to access kernel address space after powering on.
311 - * When hot-adding CPU at runtime, directly use top level page table
312 - * of kas other than the return value of getcr3(). getcr3() returns
313 - * current process's top level page table, which may be different from
314 - * the one of kas.
315 360 */
316 - rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
361 + rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE);
317 362 rm->rm_cpu = cp->cpu_id;
318 363
319 364 /*
320 - * For hot-adding CPU at runtime, Machine Check and Performance Counter
321 - * should be disabled. They will be enabled on demand after CPU powers
322 - * on successfully
365 + * We need to mask off any bits set on our boot CPU that can't apply
366 + * while the subject CPU is initializing. If appropriate, they are
367 + * enabled later on.
323 368 */
324 369 rm->rm_cr4 = getcr4();
325 - rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
370 + rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE);
326 371
327 372 rmp_gdt_init(rm);
328 373
329 374 return (ct);
330 375 }
331 376
332 377 void
333 378 mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
334 379 {
335 380 struct cpu_tables *ct = arg;
336 381
337 382 ASSERT(&ct->ct_tss == cp->cpu_tss);
338 383 if (optype == MACH_CPUCONTEXT_OP_START) {
339 384 switch (err) {
340 385 case 0:
341 386 /*
342 387 * Save pointer for reuse when stopping CPU.
343 388 */
344 389 cp->cpu_m.mcpu_mach_ctx_ptr = arg;
345 390 break;
346 391 case ETIMEDOUT:
347 392 /*
348 393 * The processor was poked, but failed to start before
349 394 * we gave up waiting for it. In case it starts later,
350 395 * don't free anything.
351 396 */
352 397 cp->cpu_m.mcpu_mach_ctx_ptr = arg;
353 398 break;
354 399 default:
355 400 /*
356 401 * Some other, passive, error occurred.
357 402 */
358 403 kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
359 404 cp->cpu_tss = NULL;
360 405 break;
361 406 }
362 407 } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
363 408 switch (err) {
364 409 case 0:
365 410 /*
366 411 * Free resources allocated when starting CPU.
367 412 */
368 413 kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
369 414 cp->cpu_tss = NULL;
370 415 cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
371 416 break;
372 417 default:
373 418 /*
374 419 * Don't touch table pointer in case of failure.
375 420 */
376 421 break;
377 422 }
378 423 } else {
379 424 ASSERT(0);
380 425 }
381 426 }
382 427
383 428 void *
384 429 mach_cpucontext_alloc(struct cpu *cp)
385 430 {
386 431 return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
387 432 }
388 433
389 434 void
390 435 mach_cpucontext_free(struct cpu *cp, void *arg, int err)
391 436 {
392 437 mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
393 438 }
394 439
395 440 /*
396 441 * "Enter monitor." Called via cross-call from stop_other_cpus().
397 442 */
398 443 void
399 444 mach_cpu_halt(char *msg)
400 445 {
401 446 if (msg)
402 447 prom_printf("%s\n", msg);
403 448
404 449 /*CONSTANTCONDITION*/
405 450 while (1)
406 451 ;
407 452 }
408 453
409 454 void
410 455 mach_cpu_idle(void)
411 456 {
412 457 i86_halt();
413 458 }
414 459
415 460 void
416 461 mach_cpu_pause(volatile char *safe)
417 462 {
418 463 /*
419 464 * This cpu is now safe.
420 465 */
421 466 *safe = PAUSE_WAIT;
422 467 membar_enter(); /* make sure stores are flushed */
423 468
424 469 /*
425 470 * Now we wait. When we are allowed to continue, safe
426 471 * will be set to PAUSE_IDLE.
427 472 */
428 473 while (*safe != PAUSE_IDLE)
429 474 SMT_PAUSE();
430 475 }
431 476
432 477 /*
433 478 * Power on the target CPU.
434 479 */
435 480 int
436 481 mp_cpu_poweron(struct cpu *cp)
437 482 {
438 483 int error;
439 484 cpuset_t tempset;
440 485 processorid_t cpuid;
441 486
442 487 ASSERT(cp != NULL);
443 488 cpuid = cp->cpu_id;
444 489 if (use_mp == 0 || plat_dr_support_cpu() == 0) {
445 490 return (ENOTSUP);
446 491 } else if (cpuid < 0 || cpuid >= max_ncpus) {
447 492 return (EINVAL);
448 493 }
449 494
450 495 /*
451 496 * The currrent x86 implementaiton of mp_cpu_configure() and
452 497 * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
453 498 * be called once after calling mp_cpu_configure() for a specific CPU.
454 499 * It's because mp_cpu_poweron() will destroy data structure created
455 500 * by mp_cpu_configure(). So reject the request if the CPU has already
456 501 * been powered on once after calling mp_cpu_configure().
457 502 * This limitaiton only affects the p_online syscall and the DR driver
458 503 * won't be affected because the DR driver always invoke public CPU
459 504 * management interfaces in the predefined order:
460 505 * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
461 506 */
462 507 if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
463 508 return (ENOTSUP);
464 509 }
465 510
466 511 /*
467 512 * Check if there's at least a Mbyte of kmem available
468 513 * before attempting to start the cpu.
469 514 */
470 515 if (kmem_avail() < 1024 * 1024) {
471 516 /*
472 517 * Kick off a reap in case that helps us with
473 518 * later attempts ..
474 519 */
475 520 kmem_reap();
476 521 return (ENOMEM);
477 522 }
478 523
479 524 affinity_set(CPU->cpu_id);
480 525
481 526 /*
482 527 * Start the target CPU. No need to call mach_cpucontext_fini()
483 528 * if mach_cpucontext_init() fails.
484 529 */
485 530 if ((error = mach_cpucontext_init()) == 0) {
486 531 error = mp_start_cpu_common(cp, B_FALSE);
487 532 mach_cpucontext_fini();
488 533 }
489 534 if (error != 0) {
490 535 affinity_clear();
491 536 return (error);
492 537 }
493 538
494 539 /* Wait for the target cpu to reach READY state. */
495 540 tempset = cpu_ready_set;
496 541 while (!CPU_IN_SET(tempset, cpuid)) {
497 542 delay(1);
498 543 tempset = *((volatile cpuset_t *)&cpu_ready_set);
499 544 }
500 545
501 546 /* Mark the target CPU as available for mp operation. */
502 547 CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
503 548
504 549 /* Free the space allocated to hold the microcode file */
505 550 ucode_cleanup();
506 551
507 552 affinity_clear();
508 553
509 554 return (0);
510 555 }
511 556
512 557 #define MP_CPU_DETACH_MAX_TRIES 5
513 558 #define MP_CPU_DETACH_DELAY 100
514 559
515 560 static int
516 561 mp_cpu_detach_driver(dev_info_t *dip)
517 562 {
518 563 int i;
519 564 int rv = EBUSY;
520 565 dev_info_t *pdip;
521 566
522 567 pdip = ddi_get_parent(dip);
523 568 ASSERT(pdip != NULL);
524 569 /*
525 570 * Check if caller holds pdip busy - can cause deadlocks in
526 571 * e_ddi_branch_unconfigure(), which calls devfs_clean().
527 572 */
528 573 if (DEVI_BUSY_OWNED(pdip)) {
529 574 return (EDEADLOCK);
530 575 }
531 576
532 577 for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
533 578 if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
534 579 rv = 0;
535 580 break;
536 581 }
537 582 DELAY(MP_CPU_DETACH_DELAY);
538 583 }
539 584
540 585 return (rv);
541 586 }
542 587
543 588 /*
544 589 * Power off the target CPU.
545 590 * Note: cpu_lock will be released and then reacquired.
546 591 */
547 592 int
548 593 mp_cpu_poweroff(struct cpu *cp)
549 594 {
550 595 int rv = 0;
551 596 void *ctx;
552 597 dev_info_t *dip = NULL;
553 598 rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
554 599 extern void cpupm_start(cpu_t *);
555 600 extern void cpupm_stop(cpu_t *);
556 601
557 602 ASSERT(cp != NULL);
558 603 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
559 604 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
560 605
561 606 if (use_mp == 0 || plat_dr_support_cpu() == 0) {
562 607 return (ENOTSUP);
563 608 }
564 609 /*
565 610 * There is no support for powering off cpu0 yet.
566 611 * There are many pieces of code which have a hard dependency on cpu0.
567 612 */
568 613 if (cp->cpu_id == 0) {
569 614 return (ENOTSUP);
570 615 };
571 616
572 617 if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
573 618 return (ENXIO);
574 619 }
575 620 ASSERT(dip != NULL);
576 621 if (mp_cpu_detach_driver(dip) != 0) {
577 622 rv = EBUSY;
578 623 goto out_online;
579 624 }
580 625
581 626 /* Allocate CPU context for stopping */
582 627 if (mach_cpucontext_init() != 0) {
583 628 rv = ENXIO;
584 629 goto out_online;
585 630 }
586 631 ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
587 632 if (ctx == NULL) {
588 633 rv = ENXIO;
589 634 goto out_context_fini;
590 635 }
591 636
592 637 cpupm_stop(cp);
593 638 cpu_event_fini_cpu(cp);
594 639
595 640 if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
596 641 cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
597 642 cp->cpu_m.mcpu_cmi_hdl = NULL;
598 643 }
599 644
600 645 rv = mach_cpu_stop(cp, ctx);
601 646 if (rv != 0) {
602 647 goto out_enable_cmi;
603 648 }
604 649
605 650 /* Wait until the target CPU has been halted. */
606 651 while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
607 652 delay(1);
608 653 }
609 654 rm->rm_cpu_halted = 0xffff;
610 655
611 656 /* CPU_READY has been cleared by mach_cpu_stop. */
612 657 ASSERT((cp->cpu_flags & CPU_READY) == 0);
613 658 ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
614 659 cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
615 660 CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
616 661
617 662 mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
618 663 mach_cpucontext_fini();
619 664
620 665 return (0);
621 666
622 667 out_enable_cmi:
623 668 {
624 669 cmi_hdl_t hdl;
625 670
626 671 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
627 672 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
628 673 if (is_x86_feature(x86_featureset, X86FSET_MCA))
629 674 cmi_mca_init(hdl);
630 675 cp->cpu_m.mcpu_cmi_hdl = hdl;
631 676 }
632 677 }
633 678 cpu_event_init_cpu(cp);
634 679 cpupm_start(cp);
635 680 mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
636 681
637 682 out_context_fini:
638 683 mach_cpucontext_fini();
639 684
640 685 out_online:
641 686 (void) e_ddi_branch_configure(dip, NULL, 0);
642 687
643 688 if (rv != EAGAIN && rv != ETIME) {
644 689 rv = ENXIO;
645 690 }
646 691
647 692 return (rv);
648 693 }
649 694
650 695 /*
651 696 * Return vcpu state, since this could be a virtual environment that we
652 697 * are unaware of, return "unknown".
653 698 */
654 699 /* ARGSUSED */
655 700 int
656 701 vcpu_on_pcpu(processorid_t cpu)
657 702 {
658 703 return (VCPU_STATE_UNKNOWN);
659 704 }
↓ open down ↓ |
324 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX