8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright (c) 2010, Intel Corporation.
  26  * All rights reserved.
  27  */
  28 /*
  29  * Copyright 2018 Joyent, Inc
  30  */
  31 
  32 /*
  33  * Welcome to the world of the "real mode platter".
  34  * See also startup.c, mpcore.s and apic.c for related routines.
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/systm.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/cpu_module.h>
  41 #include <sys/kmem.h>
  42 #include <sys/archsystm.h>
  43 #include <sys/machsystm.h>
  44 #include <sys/controlregs.h>
  45 #include <sys/x86_archext.h>
  46 #include <sys/smp_impldefs.h>
  47 #include <sys/sysmacros.h>
  48 #include <sys/mach_mmu.h>
  49 #include <sys/promif.h>
  50 #include <sys/cpu.h>
  51 #include <sys/cpu_event.h>
  52 #include <sys/sunndi.h>
  53 #include <sys/fs/dv_node.h>
  54 #include <vm/hat_i86.h>
  55 #include <vm/as.h>
  56 
  57 extern cpuset_t cpu_ready_set;
  58 
  59 extern int  mp_start_cpu_common(cpu_t *cp, boolean_t boot);
  60 extern void real_mode_start_cpu(void);
  61 extern void real_mode_start_cpu_end(void);
  62 extern void real_mode_stop_cpu_stage1(void);
  63 extern void real_mode_stop_cpu_stage1_end(void);
  64 extern void real_mode_stop_cpu_stage2(void);
  65 extern void real_mode_stop_cpu_stage2_end(void);
  66 
  67 void rmp_gdt_init(rm_platter_t *);
  68 
  69 /*
  70  * Fill up the real mode platter to make it easy for real mode code to
  71  * kick it off. This area should really be one passed by boot to kernel
  72  * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
  73  * have identical physical and virtual address in paged mode.
  74  */
  75 static ushort_t *warm_reset_vector = NULL;
  76 
  77 int
  78 mach_cpucontext_init(void)
  79 {
  80         ushort_t *vec;
  81         ulong_t addr;
  82         struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
  83 
  84         if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
  85             sizeof (vec), PROT_READ | PROT_WRITE)))
  86                 return (-1);
  87 
  88         /*
  89          * setup secondary cpu bios boot up vector
  90          * Write page offset to 0x467 and page frame number to 0x469.
  91          */
  92         addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
  93         vec[0] = (ushort_t)(addr & PAGEOFFSET);
  94         vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
  95         warm_reset_vector = vec;
  96 
  97         /* Map real mode platter into kas so kernel can access it. */
  98         hat_devload(kas.a_hat,
  99             (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
 100             btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
 101             HAT_LOAD_NOCONSIST);
 102 
 103         /* Copy CPU startup code to rm_platter if it's still during boot. */
 104         if (!plat_dr_enabled()) {
 105                 ASSERT((size_t)real_mode_start_cpu_end -
 106                     (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
 107                 bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
 108                     (size_t)real_mode_start_cpu_end -
 109                     (size_t)real_mode_start_cpu);
 110         }
 111 
 112         return (0);
 113 }
 114 
 115 void
 116 mach_cpucontext_fini(void)
 117 {
 118         if (warm_reset_vector)
 119                 psm_unmap_phys((caddr_t)warm_reset_vector,
 120                     sizeof (warm_reset_vector));
 121         hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
 122             HAT_UNLOAD);
 123 }
 124 
 125 #if defined(__amd64)
 126 extern void *long_mode_64(void);
 127 #endif  /* __amd64 */
 128 
 129 /*ARGSUSED*/
 130 void
 131 rmp_gdt_init(rm_platter_t *rm)
 132 {
 133 
 134 #if defined(__amd64)
 135         /* Use the kas address space for the CPU startup thread. */
 136         if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) {
 137                 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
 138                     "located above 4G in physical memory (@ 0x%lx)",
 139                     mmu_ptob(kas.a_hat->hat_htable->ht_pfn));
 140         }
 141 
 142         /*
 143          * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
 144          * by code in real_mode_start_cpu():
 145          *
 146          * GDT[0]:  NULL selector
 147          * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
 148          *
 149          * Clear the IDT as interrupts will be off and a limit of 0 will cause
 150          * the CPU to triple fault and reset on an NMI, seemingly as reasonable
 151          * a course of action as any other, though it may cause the entire
 152          * platform to reset in some cases...
 153          */
 154         rm->rm_temp_gdt[0] = 0;
 155         rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
 156 
 157         rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
 158         rm->rm_temp_gdt_base = rm_platter_pa +
 159             (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
 160         rm->rm_temp_idt_lim = 0;
 161         rm->rm_temp_idt_base = 0;
 162 
 163         /*
 164          * Since the CPU needs to jump to protected mode using an identity
 165          * mapped address, we need to calculate it here.
 166          */
 167         rm->rm_longmode64_addr = rm_platter_pa +
 168             (uint32_t)((uintptr_t)long_mode_64 -
 169             (uintptr_t)real_mode_start_cpu);
 170 #endif  /* __amd64 */
 171 }
 172 
 173 static void *
 174 mach_cpucontext_alloc_tables(struct cpu *cp)
 175 {
 176         tss_t *ntss;
 177         struct cpu_tables *ct;
 178         size_t ctsize;
 179 
 180         /*
 181          * Allocate space for stack, tss, gdt and idt. We round the size
 182          * allotted for cpu_tables up, so that the TSS is on a unique page.
 183          * This is more efficient when running in virtual machines.
 184          */
 185         ctsize = P2ROUNDUP(sizeof (*ct), PAGESIZE);
 186         ct = kmem_zalloc(ctsize, KM_SLEEP);
 187         if ((uintptr_t)ct & PAGEOFFSET)
 188                 panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
 189                     cp->cpu_id);
 190 
 191         ntss = cp->cpu_tss = &ct->ct_tss;
 192 
 193 #if defined(__amd64)
 194         uintptr_t va;
 195         size_t len;
 196 
 197         /*
 198          * #DF (double fault).
 199          */
 200         ntss->tss_ist1 = (uintptr_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
 201 
 202         /*
 203          * #NM (non-maskable interrupt)
 204          */
 205         ntss->tss_ist2 = (uintptr_t)&ct->ct_stack2[sizeof (ct->ct_stack2)];
 206 
 207         /*
 208          * #MC (machine check exception / hardware error)
 209          */
 210         ntss->tss_ist3 = (uintptr_t)&ct->ct_stack3[sizeof (ct->ct_stack3)];
 211 
 212         /*
 213          * #DB, #BP debug interrupts and KDI/kmdb
 214          */
 215         ntss->tss_ist4 = (uintptr_t)&cp->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
 216 
 217         if (kpti_enable == 1) {
 218                 /*
 219                  * #GP, #PF, #SS fault interrupts
 220                  */
 221                 ntss->tss_ist5 = (uintptr_t)&cp->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
 222 
 223                 /*
 224                  * Used by all other interrupts
 225                  */
 226                 ntss->tss_ist6 = (uint64_t)&cp->cpu_m.mcpu_kpti.kf_tr_rsp;
 227 
 228                 /*
 229                  * On AMD64 we need to make sure that all of the pages of the
 230                  * struct cpu_tables are punched through onto the user CPU for
 231                  * kpti.
 232                  *
 233                  * The final page will always be the TSS, so treat that
 234                  * separately.
 235                  */
 236                 for (va = (uintptr_t)ct, len = ctsize - MMU_PAGESIZE;
 237                     len >= MMU_PAGESIZE;
 238                     len -= MMU_PAGESIZE, va += MMU_PAGESIZE) {
 239                         /* The doublefault stack must be RW */
 240                         hati_cpu_punchin(cp, va, PROT_READ | PROT_WRITE);
 241                 }
 242                 ASSERT3U((uintptr_t)ntss, ==, va);
 243                 hati_cpu_punchin(cp, (uintptr_t)ntss, PROT_READ);
 244         }
 245 
 246 #elif defined(__i386)
 247 
 248         ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp =
 249             (uint32_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
 250 
 251         ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL;
 252 
 253         ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
 254 
 255         ntss->tss_cs = KCS_SEL;
 256         ntss->tss_ds = ntss->tss_es = KDS_SEL;
 257         ntss->tss_fs = KFS_SEL;
 258         ntss->tss_gs = KGS_SEL;
 259 
 260 #endif  /* __i386 */
 261 
 262         /*
 263          * Set I/O bit map offset equal to size of TSS segment limit
 264          * for no I/O permission map. This will cause all user I/O
 265          * instructions to generate #gp fault.
 266          */
 267         ntss->tss_bitmapbase = sizeof (*ntss);
 268 
 269         /*
 270          * Setup kernel tss.
 271          */
 272         set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
 273             sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
 274 
 275         return (ct);
 276 }
 277 
 278 void *
 279 mach_cpucontext_xalloc(struct cpu *cp, int optype)
 280 {
 281         size_t len;
 282         struct cpu_tables *ct;
 283         rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
 284         static int cpu_halt_code_ready;
 285 
 286         if (optype == MACH_CPUCONTEXT_OP_STOP) {
 287                 ASSERT(plat_dr_enabled());
 288 
 289                 /*
 290                  * The WARM_RESET_VECTOR has a limitation that the physical
 291                  * address written to it must be page-aligned. To work around
 292                  * this limitation, the CPU stop code has been splitted into
 293                  * two stages.
 294                  * The stage 2 code, which implements the real logic to halt
 295                  * CPUs, is copied to the rm_cpu_halt_code field in the real
 296                  * mode platter. The stage 1 code, which simply jumps to the
 297                  * stage 2 code in the rm_cpu_halt_code field, is copied to
 298                  * rm_code field in the real mode platter and it may be
 299                  * overwritten after the CPU has been stopped.
 300                  */
 301                 if (!cpu_halt_code_ready) {
 302                         /*
 303                          * The rm_cpu_halt_code field in the real mode platter
 304                          * is used by the CPU stop code only. So only copy the
 305                          * CPU stop stage 2 code into the rm_cpu_halt_code
 306                          * field on the first call.
 307                          */
 308                         len = (size_t)real_mode_stop_cpu_stage2_end -
 309                             (size_t)real_mode_stop_cpu_stage2;
 310                         ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
 311                         bcopy((caddr_t)real_mode_stop_cpu_stage2,
 312                             (caddr_t)rm->rm_cpu_halt_code, len);
 313                         cpu_halt_code_ready = 1;
 314                 }
 315 
 316                 /*
 317                  * The rm_code field in the real mode platter is shared by
 318                  * the CPU start, CPU stop, CPR and fast reboot code. So copy
 319                  * the CPU stop stage 1 code into the rm_code field every time.
 320                  */
 321                 len = (size_t)real_mode_stop_cpu_stage1_end -
 322                     (size_t)real_mode_stop_cpu_stage1;
 323                 ASSERT(len <= RM_PLATTER_CODE_SIZE);
 324                 bcopy((caddr_t)real_mode_stop_cpu_stage1,
 325                     (caddr_t)rm->rm_code, len);
 326                 rm->rm_cpu_halted = 0;
 327 
 328                 return (cp->cpu_m.mcpu_mach_ctx_ptr);
 329         } else if (optype != MACH_CPUCONTEXT_OP_START) {
 330                 return (NULL);
 331         }
 332 
 333         /*
 334          * Only need to allocate tables when starting CPU.
 335          * Tables allocated when starting CPU will be reused when stopping CPU.
 336          */
 337         ct = mach_cpucontext_alloc_tables(cp);
 338         if (ct == NULL) {
 339                 return (NULL);
 340         }
 341 
 342         /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
 343         if (plat_dr_enabled()) {
 344                 bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
 345                     (size_t)real_mode_start_cpu_end -
 346                     (size_t)real_mode_start_cpu);
 347         }
 348 
 349         /*
 350          * Now copy all that we've set up onto the real mode platter
 351          * for the real mode code to digest as part of starting the cpu.
 352          */
 353         rm->rm_idt_base = cp->cpu_idt;
 354         rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
 355         rm->rm_gdt_base = cp->cpu_gdt;
 356         rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
 357 
 358         /*
 359          * CPU needs to access kernel address space after powering on.




 360          */
 361         rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE);
 362         rm->rm_cpu = cp->cpu_id;
 363 
 364         /*
 365          * We need to mask off any bits set on our boot CPU that can't apply
 366          * while the subject CPU is initializing.  If appropriate, they are
 367          * enabled later on.
 368          */
 369         rm->rm_cr4 = getcr4();
 370         rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE);
 371 
 372         rmp_gdt_init(rm);
 373 
 374         return (ct);
 375 }
 376 
 377 void
 378 mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
 379 {
 380         struct cpu_tables *ct = arg;
 381 
 382         ASSERT(&ct->ct_tss == cp->cpu_tss);
 383         if (optype == MACH_CPUCONTEXT_OP_START) {
 384                 switch (err) {
 385                 case 0:
 386                         /*
 387                          * Save pointer for reuse when stopping CPU.
 388                          */
 389                         cp->cpu_m.mcpu_mach_ctx_ptr = arg;
 390                         break;
 391                 case ETIMEDOUT:
 392                         /*
 393                          * The processor was poked, but failed to start before
 394                          * we gave up waiting for it.  In case it starts later,
 395                          * don't free anything.
 396                          */
 397                         cp->cpu_m.mcpu_mach_ctx_ptr = arg;
 398                         break;
 399                 default:
 400                         /*
 401                          * Some other, passive, error occurred.
 402                          */
 403                         kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
 404                         cp->cpu_tss = NULL;
 405                         break;
 406                 }
 407         } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
 408                 switch (err) {
 409                 case 0:
 410                         /*
 411                          * Free resources allocated when starting CPU.
 412                          */
 413                         kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
 414                         cp->cpu_tss = NULL;
 415                         cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
 416                         break;
 417                 default:
 418                         /*
 419                          * Don't touch table pointer in case of failure.
 420                          */
 421                         break;
 422                 }
 423         } else {
 424                 ASSERT(0);
 425         }
 426 }
 427 
 428 void *
 429 mach_cpucontext_alloc(struct cpu *cp)
 430 {
 431         return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
 432 }
 433 
 434 void
 435 mach_cpucontext_free(struct cpu *cp, void *arg, int err)
 436 {
 437         mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
 438 }
 439 
 440 /*
 441  * "Enter monitor."  Called via cross-call from stop_other_cpus().
 442  */
 443 void
 444 mach_cpu_halt(char *msg)
 445 {
 446         if (msg)
 447                 prom_printf("%s\n", msg);
 448 
 449         /*CONSTANTCONDITION*/
 450         while (1)
 451                 ;
 452 }
 453 
 454 void
 455 mach_cpu_idle(void)
 456 {
 457         i86_halt();
 458 }
 459 
 460 void
 461 mach_cpu_pause(volatile char *safe)
 462 {
 463         /*
 464          * This cpu is now safe.
 465          */
 466         *safe = PAUSE_WAIT;
 467         membar_enter(); /* make sure stores are flushed */
 468 
 469         /*
 470          * Now we wait.  When we are allowed to continue, safe
 471          * will be set to PAUSE_IDLE.
 472          */
 473         while (*safe != PAUSE_IDLE)
 474                 SMT_PAUSE();
 475 }
 476 
 477 /*
 478  * Power on the target CPU.
 479  */
 480 int
 481 mp_cpu_poweron(struct cpu *cp)
 482 {
 483         int error;
 484         cpuset_t tempset;
 485         processorid_t cpuid;
 486 
 487         ASSERT(cp != NULL);
 488         cpuid = cp->cpu_id;
 489         if (use_mp == 0 || plat_dr_support_cpu() == 0) {
 490                 return (ENOTSUP);
 491         } else if (cpuid < 0 || cpuid >= max_ncpus) {
 492                 return (EINVAL);
 493         }
 494 
 495         /*
 496          * The currrent x86 implementaiton of mp_cpu_configure() and
 497          * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
 498          * be called once after calling mp_cpu_configure() for a specific CPU.
 499          * It's because mp_cpu_poweron() will destroy data structure created
 500          * by mp_cpu_configure(). So reject the request if the CPU has already
 501          * been powered on once after calling mp_cpu_configure().
 502          * This limitaiton only affects the p_online syscall and the DR driver
 503          * won't be affected because the DR driver always invoke public CPU
 504          * management interfaces in the predefined order:
 505          * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
 506          */
 507         if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
 508                 return (ENOTSUP);
 509         }
 510 
 511         /*
 512          * Check if there's at least a Mbyte of kmem available
 513          * before attempting to start the cpu.
 514          */
 515         if (kmem_avail() < 1024 * 1024) {
 516                 /*
 517                  * Kick off a reap in case that helps us with
 518                  * later attempts ..
 519                  */
 520                 kmem_reap();
 521                 return (ENOMEM);
 522         }
 523 
 524         affinity_set(CPU->cpu_id);
 525 
 526         /*
 527          * Start the target CPU. No need to call mach_cpucontext_fini()
 528          * if mach_cpucontext_init() fails.
 529          */
 530         if ((error = mach_cpucontext_init()) == 0) {
 531                 error = mp_start_cpu_common(cp, B_FALSE);
 532                 mach_cpucontext_fini();
 533         }
 534         if (error != 0) {
 535                 affinity_clear();
 536                 return (error);
 537         }
 538 
 539         /* Wait for the target cpu to reach READY state. */
 540         tempset = cpu_ready_set;
 541         while (!CPU_IN_SET(tempset, cpuid)) {
 542                 delay(1);
 543                 tempset = *((volatile cpuset_t *)&cpu_ready_set);
 544         }
 545 
 546         /* Mark the target CPU as available for mp operation. */
 547         CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
 548 
 549         /* Free the space allocated to hold the microcode file */
 550         ucode_cleanup();
 551 
 552         affinity_clear();
 553 
 554         return (0);
 555 }
 556 
 557 #define MP_CPU_DETACH_MAX_TRIES         5
 558 #define MP_CPU_DETACH_DELAY             100
 559 
 560 static int
 561 mp_cpu_detach_driver(dev_info_t *dip)
 562 {
 563         int i;
 564         int rv = EBUSY;
 565         dev_info_t *pdip;
 566 
 567         pdip = ddi_get_parent(dip);
 568         ASSERT(pdip != NULL);
 569         /*
 570          * Check if caller holds pdip busy - can cause deadlocks in
 571          * e_ddi_branch_unconfigure(), which calls devfs_clean().
 572          */
 573         if (DEVI_BUSY_OWNED(pdip)) {
 574                 return (EDEADLOCK);
 575         }
 576 
 577         for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
 578                 if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
 579                         rv = 0;
 580                         break;
 581                 }
 582                 DELAY(MP_CPU_DETACH_DELAY);
 583         }
 584 
 585         return (rv);
 586 }
 587 
 588 /*
 589  * Power off the target CPU.
 590  * Note: cpu_lock will be released and then reacquired.
 591  */
 592 int
 593 mp_cpu_poweroff(struct cpu *cp)
 594 {
 595         int rv = 0;
 596         void *ctx;
 597         dev_info_t *dip = NULL;
 598         rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
 599         extern void cpupm_start(cpu_t *);
 600         extern void cpupm_stop(cpu_t *);
 601 
 602         ASSERT(cp != NULL);
 603         ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
 604         ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
 605 
 606         if (use_mp == 0 || plat_dr_support_cpu() == 0) {
 607                 return (ENOTSUP);
 608         }
 609         /*
 610          * There is no support for powering off cpu0 yet.
 611          * There are many pieces of code which have a hard dependency on cpu0.
 612          */
 613         if (cp->cpu_id == 0) {
 614                 return (ENOTSUP);
 615         };
 616 
 617         if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
 618                 return (ENXIO);
 619         }
 620         ASSERT(dip != NULL);
 621         if (mp_cpu_detach_driver(dip) != 0) {
 622                 rv = EBUSY;
 623                 goto out_online;
 624         }
 625 
 626         /* Allocate CPU context for stopping */
 627         if (mach_cpucontext_init() != 0) {
 628                 rv = ENXIO;
 629                 goto out_online;
 630         }
 631         ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
 632         if (ctx == NULL) {
 633                 rv = ENXIO;
 634                 goto out_context_fini;
 635         }
 636 
 637         cpupm_stop(cp);
 638         cpu_event_fini_cpu(cp);
 639 
 640         if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
 641                 cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
 642                 cp->cpu_m.mcpu_cmi_hdl = NULL;
 643         }
 644 
 645         rv = mach_cpu_stop(cp, ctx);
 646         if (rv != 0) {
 647                 goto out_enable_cmi;
 648         }
 649 
 650         /* Wait until the target CPU has been halted. */
 651         while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
 652                 delay(1);
 653         }
 654         rm->rm_cpu_halted = 0xffff;
 655 
 656         /* CPU_READY has been cleared by mach_cpu_stop. */
 657         ASSERT((cp->cpu_flags & CPU_READY) == 0);
 658         ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
 659         cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
 660         CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
 661 
 662         mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
 663         mach_cpucontext_fini();
 664 
 665         return (0);
 666 
 667 out_enable_cmi:
 668         {
 669                 cmi_hdl_t hdl;
 670 
 671                 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
 672                     cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
 673                         if (is_x86_feature(x86_featureset, X86FSET_MCA))
 674                                 cmi_mca_init(hdl);
 675                         cp->cpu_m.mcpu_cmi_hdl = hdl;
 676                 }
 677         }
 678         cpu_event_init_cpu(cp);
 679         cpupm_start(cp);
 680         mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
 681 
 682 out_context_fini:
 683         mach_cpucontext_fini();
 684 
 685 out_online:
 686         (void) e_ddi_branch_configure(dip, NULL, 0);
 687 
 688         if (rv != EAGAIN && rv != ETIME) {
 689                 rv = ENXIO;
 690         }
 691 
 692         return (rv);
 693 }
 694 
 695 /*
 696  * Return vcpu state, since this could be a virtual environment that we
 697  * are unaware of, return "unknown".
 698  */
 699 /* ARGSUSED */
 700 int
 701 vcpu_on_pcpu(processorid_t cpu)
 702 {
 703         return (VCPU_STATE_UNKNOWN);
 704 }
--- EOF ---