1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2010, Intel Corporation. 27 * All rights reserved. 28 */ 29 /* 30 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/thread.h> 35 #include <sys/cpuvar.h> 36 #include <sys/cpu.h> 37 #include <sys/t_lock.h> 38 #include <sys/param.h> 39 #include <sys/proc.h> 40 #include <sys/disp.h> 41 #include <sys/class.h> 42 #include <sys/cmn_err.h> 43 #include <sys/debug.h> 44 #include <sys/note.h> 45 #include <sys/asm_linkage.h> 46 #include <sys/x_call.h> 47 #include <sys/systm.h> 48 #include <sys/var.h> 49 #include <sys/vtrace.h> 50 #include <vm/hat.h> 51 #include <vm/as.h> 52 #include <vm/seg_kmem.h> 53 #include <vm/seg_kp.h> 54 #include <sys/segments.h> 55 #include <sys/kmem.h> 56 #include <sys/stack.h> 57 #include <sys/smp_impldefs.h> 58 #include <sys/x86_archext.h> 59 #include <sys/machsystm.h> 60 #include <sys/traptrace.h> 61 #include <sys/clock.h> 62 #include <sys/cpc_impl.h> 63 #include <sys/pg.h> 64 #include <sys/cmt.h> 65 #include <sys/dtrace.h> 66 #include <sys/archsystm.h> 67 #include <sys/fp.h> 68 #include <sys/reboot.h> 69 #include <sys/kdi_machimpl.h> 70 #include <vm/hat_i86.h> 71 #include <vm/vm_dep.h> 72 #include <sys/memnode.h> 73 #include <sys/pci_cfgspace.h> 74 #include <sys/mach_mmu.h> 75 #include <sys/sysmacros.h> 76 #if defined(__xpv) 77 #include <sys/hypervisor.h> 78 #endif 79 #include <sys/cpu_module.h> 80 81 struct cpu cpus[1]; /* CPU data */ 82 struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */ 83 struct cpu *cpu_free_list; /* list for released CPUs */ 84 cpu_core_t cpu_core[NCPU]; /* cpu_core structures */ 85 86 #define cpu_next_free cpu_prev 87 88 /* 89 * Useful for disabling MP bring-up on a MP capable system. 90 */ 91 int use_mp = 1; 92 93 /* 94 * to be set by a PSM to indicate what cpus 95 * are sitting around on the system. 96 */ 97 cpuset_t mp_cpus; 98 99 /* 100 * This variable is used by the hat layer to decide whether or not 101 * critical sections are needed to prevent race conditions. For sun4m, 102 * this variable is set once enough MP initialization has been done in 103 * order to allow cross calls. 104 */ 105 int flushes_require_xcalls; 106 107 cpuset_t cpu_ready_set; /* initialized in startup() */ 108 109 static void mp_startup_boot(void); 110 static void mp_startup_hotplug(void); 111 112 static void cpu_sep_enable(void); 113 static void cpu_sep_disable(void); 114 static void cpu_asysc_enable(void); 115 static void cpu_asysc_disable(void); 116 117 /* 118 * Init CPU info - get CPU type info for processor_info system call. 119 */ 120 void 121 init_cpu_info(struct cpu *cp) 122 { 123 processor_info_t *pi = &cp->cpu_type_info; 124 125 /* 126 * Get clock-frequency property for the CPU. 127 */ 128 pi->pi_clock = cpu_freq; 129 130 /* 131 * Current frequency in Hz. 132 */ 133 cp->cpu_curr_clock = cpu_freq_hz; 134 135 /* 136 * Supported frequencies. 137 */ 138 if (cp->cpu_supp_freqs == NULL) { 139 cpu_set_supp_freqs(cp, NULL); 140 } 141 142 (void) strcpy(pi->pi_processor_type, "i386"); 143 if (fpu_exists) 144 (void) strcpy(pi->pi_fputypes, "i387 compatible"); 145 146 cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP); 147 cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP); 148 149 /* 150 * If called for the BSP, cp is equal to current CPU. 151 * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info 152 * of current CPU as default values for cpu_idstr and cpu_brandstr. 153 * They will be corrected in mp_startup_common() after cpuid_pass1() 154 * has been invoked on target CPU. 155 */ 156 (void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN); 157 (void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN); 158 } 159 160 /* 161 * Configure syscall support on this CPU. 162 */ 163 /*ARGSUSED*/ 164 void 165 init_cpu_syscall(struct cpu *cp) 166 { 167 kpreempt_disable(); 168 169 #if defined(__amd64) 170 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 171 is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 172 173 #if !defined(__lint) 174 /* 175 * The syscall instruction imposes a certain ordering on 176 * segment selectors, so we double-check that ordering 177 * here. 178 */ 179 ASSERT(KDS_SEL == KCS_SEL + 8); 180 ASSERT(UDS_SEL == U32CS_SEL + 8); 181 ASSERT(UCS_SEL == U32CS_SEL + 16); 182 #endif 183 /* 184 * Turn syscall/sysret extensions on. 185 */ 186 cpu_asysc_enable(); 187 188 /* 189 * Program the magic registers .. 190 */ 191 wrmsr(MSR_AMD_STAR, 192 ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); 193 wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall); 194 wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32); 195 196 /* 197 * This list of flags is masked off the incoming 198 * %rfl when we enter the kernel. 199 */ 200 wrmsr(MSR_AMD_SFMASK, (uint64_t)(uintptr_t)(PS_IE | PS_T)); 201 } 202 #endif 203 204 /* 205 * On 32-bit kernels, we use sysenter/sysexit because it's too 206 * hard to use syscall/sysret, and it is more portable anyway. 207 * 208 * On 64-bit kernels on Nocona machines, the 32-bit syscall 209 * variant isn't available to 32-bit applications, but sysenter is. 210 */ 211 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 212 is_x86_feature(x86_featureset, X86FSET_SEP)) { 213 214 #if !defined(__lint) 215 /* 216 * The sysenter instruction imposes a certain ordering on 217 * segment selectors, so we double-check that ordering 218 * here. See "sysenter" in Intel document 245471-012, "IA-32 219 * Intel Architecture Software Developer's Manual Volume 2: 220 * Instruction Set Reference" 221 */ 222 ASSERT(KDS_SEL == KCS_SEL + 8); 223 224 ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3)); 225 ASSERT32(UDS_SEL == UCS_SEL + 8); 226 227 ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3)); 228 ASSERT64(UDS_SEL == U32CS_SEL + 8); 229 #endif 230 231 cpu_sep_enable(); 232 233 /* 234 * resume() sets this value to the base of the threads stack 235 * via a context handler. 236 */ 237 wrmsr(MSR_INTC_SEP_ESP, 0); 238 wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter); 239 } 240 241 kpreempt_enable(); 242 } 243 244 /* 245 * Multiprocessor initialization. 246 * 247 * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the 248 * startup and idle threads for the specified CPU. 249 * Parameter boot is true for boot time operations and is false for CPU 250 * DR operations. 251 */ 252 static struct cpu * 253 mp_cpu_configure_common(int cpun, boolean_t boot) 254 { 255 struct cpu *cp; 256 kthread_id_t tp; 257 caddr_t sp; 258 proc_t *procp; 259 #if !defined(__xpv) 260 extern int idle_cpu_prefer_mwait; 261 extern void cpu_idle_mwait(); 262 #endif 263 extern void idle(); 264 extern void cpu_idle(); 265 266 #ifdef TRAPTRACE 267 trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun]; 268 #endif 269 270 ASSERT(MUTEX_HELD(&cpu_lock)); 271 ASSERT(cpun < NCPU && cpu[cpun] == NULL); 272 273 if (cpu_free_list == NULL) { 274 cp = kmem_zalloc(sizeof (*cp), KM_SLEEP); 275 } else { 276 cp = cpu_free_list; 277 cpu_free_list = cp->cpu_next_free; 278 } 279 280 cp->cpu_m.mcpu_istamp = cpun << 16; 281 282 /* Create per CPU specific threads in the process p0. */ 283 procp = &p0; 284 285 /* 286 * Initialize the dispatcher first. 287 */ 288 disp_cpu_init(cp); 289 290 cpu_vm_data_init(cp); 291 292 /* 293 * Allocate and initialize the startup thread for this CPU. 294 * Interrupt and process switch stacks get allocated later 295 * when the CPU starts running. 296 */ 297 tp = thread_create(NULL, 0, NULL, NULL, 0, procp, 298 TS_STOPPED, maxclsyspri); 299 300 /* 301 * Set state to TS_ONPROC since this thread will start running 302 * as soon as the CPU comes online. 303 * 304 * All the other fields of the thread structure are setup by 305 * thread_create(). 306 */ 307 THREAD_ONPROC(tp, cp); 308 tp->t_preempt = 1; 309 tp->t_bound_cpu = cp; 310 tp->t_affinitycnt = 1; 311 tp->t_cpu = cp; 312 tp->t_disp_queue = cp->cpu_disp; 313 314 /* 315 * Setup thread to start in mp_startup_common. 316 */ 317 sp = tp->t_stk; 318 tp->t_sp = (uintptr_t)(sp - MINFRAME); 319 #if defined(__amd64) 320 tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */ 321 #endif 322 /* 323 * Setup thread start entry point for boot or hotplug. 324 */ 325 if (boot) { 326 tp->t_pc = (uintptr_t)mp_startup_boot; 327 } else { 328 tp->t_pc = (uintptr_t)mp_startup_hotplug; 329 } 330 331 cp->cpu_id = cpun; 332 cp->cpu_self = cp; 333 cp->cpu_thread = tp; 334 cp->cpu_lwp = NULL; 335 cp->cpu_dispthread = tp; 336 cp->cpu_dispatch_pri = DISP_PRIO(tp); 337 338 /* 339 * cpu_base_spl must be set explicitly here to prevent any blocking 340 * operations in mp_startup_common from causing the spl of the cpu 341 * to drop to 0 (allowing device interrupts before we're ready) in 342 * resume(). 343 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY. 344 * As an extra bit of security on DEBUG kernels, this is enforced with 345 * an assertion in mp_startup_common() -- before cpu_base_spl is set 346 * to its proper value. 347 */ 348 cp->cpu_base_spl = ipltospl(LOCK_LEVEL); 349 350 /* 351 * Now, initialize per-CPU idle thread for this CPU. 352 */ 353 tp = thread_create(NULL, PAGESIZE, idle, NULL, 0, procp, TS_ONPROC, -1); 354 355 cp->cpu_idle_thread = tp; 356 357 tp->t_preempt = 1; 358 tp->t_bound_cpu = cp; 359 tp->t_affinitycnt = 1; 360 tp->t_cpu = cp; 361 tp->t_disp_queue = cp->cpu_disp; 362 363 /* 364 * Bootstrap the CPU's PG data 365 */ 366 pg_cpu_bootstrap(cp); 367 368 /* 369 * Perform CPC initialization on the new CPU. 370 */ 371 kcpc_hw_init(cp); 372 373 /* 374 * Allocate virtual addresses for cpu_caddr1 and cpu_caddr2 375 * for each CPU. 376 */ 377 setup_vaddr_for_ppcopy(cp); 378 379 /* 380 * Allocate page for new GDT and initialize from current GDT. 381 */ 382 #if !defined(__lint) 383 ASSERT((sizeof (*cp->cpu_gdt) * NGDT) <= PAGESIZE); 384 #endif 385 cp->cpu_gdt = kmem_zalloc(PAGESIZE, KM_SLEEP); 386 bcopy(CPU->cpu_gdt, cp->cpu_gdt, (sizeof (*cp->cpu_gdt) * NGDT)); 387 388 #if defined(__i386) 389 /* 390 * setup kernel %gs. 391 */ 392 set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, 393 SEL_KPL, 0, 1); 394 #endif 395 396 /* 397 * If we have more than one node, each cpu gets a copy of IDT 398 * local to its node. If this is a Pentium box, we use cpu 0's 399 * IDT. cpu 0's IDT has been made read-only to workaround the 400 * cmpxchgl register bug 401 */ 402 if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) { 403 #if !defined(__lint) 404 ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); 405 #endif 406 cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP); 407 bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); 408 } else { 409 cp->cpu_idt = CPU->cpu_idt; 410 } 411 412 /* 413 * Get interrupt priority data from cpu 0. 414 */ 415 cp->cpu_pri_data = CPU->cpu_pri_data; 416 417 /* 418 * alloc space for cpuid info 419 */ 420 cpuid_alloc_space(cp); 421 #if !defined(__xpv) 422 if (is_x86_feature(x86_featureset, X86FSET_MWAIT) && 423 idle_cpu_prefer_mwait) { 424 cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp); 425 cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait; 426 } else 427 #endif 428 cp->cpu_m.mcpu_idle_cpu = cpu_idle; 429 430 init_cpu_info(cp); 431 432 /* 433 * alloc space for ucode_info 434 */ 435 ucode_alloc_space(cp); 436 xc_init_cpu(cp); 437 hat_cpu_online(cp); 438 439 #ifdef TRAPTRACE 440 /* 441 * If this is a TRAPTRACE kernel, allocate TRAPTRACE buffers 442 */ 443 ttc->ttc_first = (uintptr_t)kmem_zalloc(trap_trace_bufsize, KM_SLEEP); 444 ttc->ttc_next = ttc->ttc_first; 445 ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize; 446 #endif 447 448 /* 449 * Record that we have another CPU. 450 */ 451 /* 452 * Initialize the interrupt threads for this CPU 453 */ 454 cpu_intr_alloc(cp, NINTR_THREADS); 455 456 cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF; 457 cpu_set_state(cp); 458 459 /* 460 * Add CPU to list of available CPUs. It'll be on the active list 461 * after mp_startup_common(). 462 */ 463 cpu_add_unit(cp); 464 465 return (cp); 466 } 467 468 /* 469 * Undo what was done in mp_cpu_configure_common 470 */ 471 static void 472 mp_cpu_unconfigure_common(struct cpu *cp, int error) 473 { 474 ASSERT(MUTEX_HELD(&cpu_lock)); 475 476 /* 477 * Remove the CPU from the list of available CPUs. 478 */ 479 cpu_del_unit(cp->cpu_id); 480 481 if (error == ETIMEDOUT) { 482 /* 483 * The cpu was started, but never *seemed* to run any 484 * code in the kernel; it's probably off spinning in its 485 * own private world, though with potential references to 486 * our kmem-allocated IDTs and GDTs (for example). 487 * 488 * Worse still, it may actually wake up some time later, 489 * so rather than guess what it might or might not do, we 490 * leave the fundamental data structures intact. 491 */ 492 cp->cpu_flags = 0; 493 return; 494 } 495 496 /* 497 * At this point, the only threads bound to this CPU should 498 * special per-cpu threads: it's idle thread, it's pause threads, 499 * and it's interrupt threads. Clean these up. 500 */ 501 cpu_destroy_bound_threads(cp); 502 cp->cpu_idle_thread = NULL; 503 504 /* 505 * Free the interrupt stack. 506 */ 507 segkp_release(segkp, 508 cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME))); 509 cp->cpu_intr_stack = NULL; 510 511 #ifdef TRAPTRACE 512 /* 513 * Discard the trap trace buffer 514 */ 515 { 516 trap_trace_ctl_t *ttc = &trap_trace_ctl[cp->cpu_id]; 517 518 kmem_free((void *)ttc->ttc_first, trap_trace_bufsize); 519 ttc->ttc_first = NULL; 520 } 521 #endif 522 523 hat_cpu_offline(cp); 524 525 ucode_free_space(cp); 526 527 /* Free CPU ID string and brand string. */ 528 if (cp->cpu_idstr) { 529 kmem_free(cp->cpu_idstr, CPU_IDSTRLEN); 530 cp->cpu_idstr = NULL; 531 } 532 if (cp->cpu_brandstr) { 533 kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN); 534 cp->cpu_brandstr = NULL; 535 } 536 537 #if !defined(__xpv) 538 if (cp->cpu_m.mcpu_mwait != NULL) { 539 cpuid_mwait_free(cp); 540 cp->cpu_m.mcpu_mwait = NULL; 541 } 542 #endif 543 cpuid_free_space(cp); 544 545 if (cp->cpu_idt != CPU->cpu_idt) 546 kmem_free(cp->cpu_idt, PAGESIZE); 547 cp->cpu_idt = NULL; 548 549 kmem_free(cp->cpu_gdt, PAGESIZE); 550 cp->cpu_gdt = NULL; 551 552 if (cp->cpu_supp_freqs != NULL) { 553 size_t len = strlen(cp->cpu_supp_freqs) + 1; 554 kmem_free(cp->cpu_supp_freqs, len); 555 cp->cpu_supp_freqs = NULL; 556 } 557 558 teardown_vaddr_for_ppcopy(cp); 559 560 kcpc_hw_fini(cp); 561 562 cp->cpu_dispthread = NULL; 563 cp->cpu_thread = NULL; /* discarded by cpu_destroy_bound_threads() */ 564 565 cpu_vm_data_destroy(cp); 566 567 xc_fini_cpu(cp); 568 disp_cpu_fini(cp); 569 570 ASSERT(cp != CPU0); 571 bzero(cp, sizeof (*cp)); 572 cp->cpu_next_free = cpu_free_list; 573 cpu_free_list = cp; 574 } 575 576 /* 577 * Apply workarounds for known errata, and warn about those that are absent. 578 * 579 * System vendors occasionally create configurations which contain different 580 * revisions of the CPUs that are almost but not exactly the same. At the 581 * time of writing, this meant that their clock rates were the same, their 582 * feature sets were the same, but the required workaround were -not- 583 * necessarily the same. So, this routine is invoked on -every- CPU soon 584 * after starting to make sure that the resulting system contains the most 585 * pessimal set of workarounds needed to cope with *any* of the CPUs in the 586 * system. 587 * 588 * workaround_errata is invoked early in mlsetup() for CPU 0, and in 589 * mp_startup_common() for all slave CPUs. Slaves process workaround_errata 590 * prior to acknowledging their readiness to the master, so this routine will 591 * never be executed by multiple CPUs in parallel, thus making updates to 592 * global data safe. 593 * 594 * These workarounds are based on Rev 3.57 of the Revision Guide for 595 * AMD Athlon(tm) 64 and AMD Opteron(tm) Processors, August 2005. 596 */ 597 598 #if defined(OPTERON_ERRATUM_88) 599 int opteron_erratum_88; /* if non-zero -> at least one cpu has it */ 600 #endif 601 602 #if defined(OPTERON_ERRATUM_91) 603 int opteron_erratum_91; /* if non-zero -> at least one cpu has it */ 604 #endif 605 606 #if defined(OPTERON_ERRATUM_93) 607 int opteron_erratum_93; /* if non-zero -> at least one cpu has it */ 608 #endif 609 610 #if defined(OPTERON_ERRATUM_95) 611 int opteron_erratum_95; /* if non-zero -> at least one cpu has it */ 612 #endif 613 614 #if defined(OPTERON_ERRATUM_100) 615 int opteron_erratum_100; /* if non-zero -> at least one cpu has it */ 616 #endif 617 618 #if defined(OPTERON_ERRATUM_108) 619 int opteron_erratum_108; /* if non-zero -> at least one cpu has it */ 620 #endif 621 622 #if defined(OPTERON_ERRATUM_109) 623 int opteron_erratum_109; /* if non-zero -> at least one cpu has it */ 624 #endif 625 626 #if defined(OPTERON_ERRATUM_121) 627 int opteron_erratum_121; /* if non-zero -> at least one cpu has it */ 628 #endif 629 630 #if defined(OPTERON_ERRATUM_122) 631 int opteron_erratum_122; /* if non-zero -> at least one cpu has it */ 632 #endif 633 634 #if defined(OPTERON_ERRATUM_123) 635 int opteron_erratum_123; /* if non-zero -> at least one cpu has it */ 636 #endif 637 638 #if defined(OPTERON_ERRATUM_131) 639 int opteron_erratum_131; /* if non-zero -> at least one cpu has it */ 640 #endif 641 642 #if defined(OPTERON_WORKAROUND_6336786) 643 int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */ 644 int opteron_workaround_6336786_UP = 0; /* Not needed for UP */ 645 #endif 646 647 #if defined(OPTERON_WORKAROUND_6323525) 648 int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ 649 #endif 650 651 #if defined(OPTERON_ERRATUM_298) 652 int opteron_erratum_298; 653 #endif 654 655 #if defined(OPTERON_ERRATUM_721) 656 int opteron_erratum_721; 657 #endif 658 659 static void 660 workaround_warning(cpu_t *cp, uint_t erratum) 661 { 662 cmn_err(CE_WARN, "cpu%d: no workaround for erratum %u", 663 cp->cpu_id, erratum); 664 } 665 666 static void 667 workaround_applied(uint_t erratum) 668 { 669 if (erratum > 1000000) 670 cmn_err(CE_CONT, "?workaround applied for cpu issue #%d\n", 671 erratum); 672 else 673 cmn_err(CE_CONT, "?workaround applied for cpu erratum #%d\n", 674 erratum); 675 } 676 677 static void 678 msr_warning(cpu_t *cp, const char *rw, uint_t msr, int error) 679 { 680 cmn_err(CE_WARN, "cpu%d: couldn't %smsr 0x%x, error %d", 681 cp->cpu_id, rw, msr, error); 682 } 683 684 /* 685 * Determine the number of nodes in a Hammer / Greyhound / Griffin family 686 * system. 687 */ 688 static uint_t 689 opteron_get_nnodes(void) 690 { 691 static uint_t nnodes = 0; 692 693 if (nnodes == 0) { 694 #ifdef DEBUG 695 uint_t family; 696 697 /* 698 * This routine uses a PCI config space based mechanism 699 * for retrieving the number of nodes in the system. 700 * Device 24, function 0, offset 0x60 as used here is not 701 * AMD processor architectural, and may not work on processor 702 * families other than those listed below. 703 * 704 * Callers of this routine must ensure that we're running on 705 * a processor which supports this mechanism. 706 * The assertion below is meant to catch calls on unsupported 707 * processors. 708 */ 709 family = cpuid_getfamily(CPU); 710 ASSERT(family == 0xf || family == 0x10 || family == 0x11); 711 #endif /* DEBUG */ 712 713 /* 714 * Obtain the number of nodes in the system from 715 * bits [6:4] of the Node ID register on node 0. 716 * 717 * The actual node count is NodeID[6:4] + 1 718 * 719 * The Node ID register is accessed via function 0, 720 * offset 0x60. Node 0 is device 24. 721 */ 722 nnodes = ((pci_getl_func(0, 24, 0, 0x60) & 0x70) >> 4) + 1; 723 } 724 return (nnodes); 725 } 726 727 uint_t 728 do_erratum_298(struct cpu *cpu) 729 { 730 static int osvwrc = -3; 731 extern int osvw_opteron_erratum(cpu_t *, uint_t); 732 733 /* 734 * L2 Eviction May Occur During Processor Operation To Set 735 * Accessed or Dirty Bit. 736 */ 737 if (osvwrc == -3) { 738 osvwrc = osvw_opteron_erratum(cpu, 298); 739 } else { 740 /* osvw return codes should be consistent for all cpus */ 741 ASSERT(osvwrc == osvw_opteron_erratum(cpu, 298)); 742 } 743 744 switch (osvwrc) { 745 case 0: /* erratum is not present: do nothing */ 746 break; 747 case 1: /* erratum is present: BIOS workaround applied */ 748 /* 749 * check if workaround is actually in place and issue warning 750 * if not. 751 */ 752 if (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || 753 ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0)) { 754 #if defined(OPTERON_ERRATUM_298) 755 opteron_erratum_298++; 756 #else 757 workaround_warning(cpu, 298); 758 return (1); 759 #endif 760 } 761 break; 762 case -1: /* cannot determine via osvw: check cpuid */ 763 if ((cpuid_opteron_erratum(cpu, 298) > 0) && 764 (((rdmsr(MSR_AMD_HWCR) & AMD_HWCR_TLBCACHEDIS) == 0) || 765 ((rdmsr(MSR_AMD_BU_CFG) & AMD_BU_CFG_E298) == 0))) { 766 #if defined(OPTERON_ERRATUM_298) 767 opteron_erratum_298++; 768 #else 769 workaround_warning(cpu, 298); 770 return (1); 771 #endif 772 } 773 break; 774 } 775 return (0); 776 } 777 778 uint_t 779 workaround_errata(struct cpu *cpu) 780 { 781 uint_t missing = 0; 782 783 ASSERT(cpu == CPU); 784 785 /*LINTED*/ 786 if (cpuid_opteron_erratum(cpu, 88) > 0) { 787 /* 788 * SWAPGS May Fail To Read Correct GS Base 789 */ 790 #if defined(OPTERON_ERRATUM_88) 791 /* 792 * The workaround is an mfence in the relevant assembler code 793 */ 794 opteron_erratum_88++; 795 #else 796 workaround_warning(cpu, 88); 797 missing++; 798 #endif 799 } 800 801 if (cpuid_opteron_erratum(cpu, 91) > 0) { 802 /* 803 * Software Prefetches May Report A Page Fault 804 */ 805 #if defined(OPTERON_ERRATUM_91) 806 /* 807 * fix is in trap.c 808 */ 809 opteron_erratum_91++; 810 #else 811 workaround_warning(cpu, 91); 812 missing++; 813 #endif 814 } 815 816 if (cpuid_opteron_erratum(cpu, 93) > 0) { 817 /* 818 * RSM Auto-Halt Restart Returns to Incorrect RIP 819 */ 820 #if defined(OPTERON_ERRATUM_93) 821 /* 822 * fix is in trap.c 823 */ 824 opteron_erratum_93++; 825 #else 826 workaround_warning(cpu, 93); 827 missing++; 828 #endif 829 } 830 831 /*LINTED*/ 832 if (cpuid_opteron_erratum(cpu, 95) > 0) { 833 /* 834 * RET Instruction May Return to Incorrect EIP 835 */ 836 #if defined(OPTERON_ERRATUM_95) 837 #if defined(_LP64) 838 /* 839 * Workaround this by ensuring that 32-bit user code and 840 * 64-bit kernel code never occupy the same address 841 * range mod 4G. 842 */ 843 if (_userlimit32 > 0xc0000000ul) 844 *(uintptr_t *)&_userlimit32 = 0xc0000000ul; 845 846 /*LINTED*/ 847 ASSERT((uint32_t)COREHEAP_BASE == 0xc0000000u); 848 opteron_erratum_95++; 849 #endif /* _LP64 */ 850 #else 851 workaround_warning(cpu, 95); 852 missing++; 853 #endif 854 } 855 856 if (cpuid_opteron_erratum(cpu, 100) > 0) { 857 /* 858 * Compatibility Mode Branches Transfer to Illegal Address 859 */ 860 #if defined(OPTERON_ERRATUM_100) 861 /* 862 * fix is in trap.c 863 */ 864 opteron_erratum_100++; 865 #else 866 workaround_warning(cpu, 100); 867 missing++; 868 #endif 869 } 870 871 /*LINTED*/ 872 if (cpuid_opteron_erratum(cpu, 108) > 0) { 873 /* 874 * CPUID Instruction May Return Incorrect Model Number In 875 * Some Processors 876 */ 877 #if defined(OPTERON_ERRATUM_108) 878 /* 879 * (Our cpuid-handling code corrects the model number on 880 * those processors) 881 */ 882 #else 883 workaround_warning(cpu, 108); 884 missing++; 885 #endif 886 } 887 888 /*LINTED*/ 889 if (cpuid_opteron_erratum(cpu, 109) > 0) do { 890 /* 891 * Certain Reverse REP MOVS May Produce Unpredictable Behavior 892 */ 893 #if defined(OPTERON_ERRATUM_109) 894 /* 895 * The "workaround" is to print a warning to upgrade the BIOS 896 */ 897 uint64_t value; 898 const uint_t msr = MSR_AMD_PATCHLEVEL; 899 int err; 900 901 if ((err = checked_rdmsr(msr, &value)) != 0) { 902 msr_warning(cpu, "rd", msr, err); 903 workaround_warning(cpu, 109); 904 missing++; 905 } 906 if (value == 0) 907 opteron_erratum_109++; 908 #else 909 workaround_warning(cpu, 109); 910 missing++; 911 #endif 912 /*CONSTANTCONDITION*/ 913 } while (0); 914 915 /*LINTED*/ 916 if (cpuid_opteron_erratum(cpu, 121) > 0) { 917 /* 918 * Sequential Execution Across Non_Canonical Boundary Caused 919 * Processor Hang 920 */ 921 #if defined(OPTERON_ERRATUM_121) 922 #if defined(_LP64) 923 /* 924 * Erratum 121 is only present in long (64 bit) mode. 925 * Workaround is to include the page immediately before the 926 * va hole to eliminate the possibility of system hangs due to 927 * sequential execution across the va hole boundary. 928 */ 929 if (opteron_erratum_121) 930 opteron_erratum_121++; 931 else { 932 if (hole_start) { 933 hole_start -= PAGESIZE; 934 } else { 935 /* 936 * hole_start not yet initialized by 937 * mmu_init. Initialize hole_start 938 * with value to be subtracted. 939 */ 940 hole_start = PAGESIZE; 941 } 942 opteron_erratum_121++; 943 } 944 #endif /* _LP64 */ 945 #else 946 workaround_warning(cpu, 121); 947 missing++; 948 #endif 949 } 950 951 /*LINTED*/ 952 if (cpuid_opteron_erratum(cpu, 122) > 0) do { 953 /* 954 * TLB Flush Filter May Cause Coherency Problem in 955 * Multiprocessor Systems 956 */ 957 #if defined(OPTERON_ERRATUM_122) 958 uint64_t value; 959 const uint_t msr = MSR_AMD_HWCR; 960 int error; 961 962 /* 963 * Erratum 122 is only present in MP configurations (multi-core 964 * or multi-processor). 965 */ 966 #if defined(__xpv) 967 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 968 break; 969 if (!opteron_erratum_122 && xpv_nr_phys_cpus() == 1) 970 break; 971 #else 972 if (!opteron_erratum_122 && opteron_get_nnodes() == 1 && 973 cpuid_get_ncpu_per_chip(cpu) == 1) 974 break; 975 #endif 976 /* disable TLB Flush Filter */ 977 978 if ((error = checked_rdmsr(msr, &value)) != 0) { 979 msr_warning(cpu, "rd", msr, error); 980 workaround_warning(cpu, 122); 981 missing++; 982 } else { 983 value |= (uint64_t)AMD_HWCR_FFDIS; 984 if ((error = checked_wrmsr(msr, value)) != 0) { 985 msr_warning(cpu, "wr", msr, error); 986 workaround_warning(cpu, 122); 987 missing++; 988 } 989 } 990 opteron_erratum_122++; 991 #else 992 workaround_warning(cpu, 122); 993 missing++; 994 #endif 995 /*CONSTANTCONDITION*/ 996 } while (0); 997 998 /*LINTED*/ 999 if (cpuid_opteron_erratum(cpu, 123) > 0) do { 1000 /* 1001 * Bypassed Reads May Cause Data Corruption of System Hang in 1002 * Dual Core Processors 1003 */ 1004 #if defined(OPTERON_ERRATUM_123) 1005 uint64_t value; 1006 const uint_t msr = MSR_AMD_PATCHLEVEL; 1007 int err; 1008 1009 /* 1010 * Erratum 123 applies only to multi-core cpus. 1011 */ 1012 if (cpuid_get_ncpu_per_chip(cpu) < 2) 1013 break; 1014 #if defined(__xpv) 1015 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 1016 break; 1017 #endif 1018 /* 1019 * The "workaround" is to print a warning to upgrade the BIOS 1020 */ 1021 if ((err = checked_rdmsr(msr, &value)) != 0) { 1022 msr_warning(cpu, "rd", msr, err); 1023 workaround_warning(cpu, 123); 1024 missing++; 1025 } 1026 if (value == 0) 1027 opteron_erratum_123++; 1028 #else 1029 workaround_warning(cpu, 123); 1030 missing++; 1031 1032 #endif 1033 /*CONSTANTCONDITION*/ 1034 } while (0); 1035 1036 /*LINTED*/ 1037 if (cpuid_opteron_erratum(cpu, 131) > 0) do { 1038 /* 1039 * Multiprocessor Systems with Four or More Cores May Deadlock 1040 * Waiting for a Probe Response 1041 */ 1042 #if defined(OPTERON_ERRATUM_131) 1043 uint64_t nbcfg; 1044 const uint_t msr = MSR_AMD_NB_CFG; 1045 const uint64_t wabits = 1046 AMD_NB_CFG_SRQ_HEARTBEAT | AMD_NB_CFG_SRQ_SPR; 1047 int error; 1048 1049 /* 1050 * Erratum 131 applies to any system with four or more cores. 1051 */ 1052 if (opteron_erratum_131) 1053 break; 1054 #if defined(__xpv) 1055 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 1056 break; 1057 if (xpv_nr_phys_cpus() < 4) 1058 break; 1059 #else 1060 if (opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu) < 4) 1061 break; 1062 #endif 1063 /* 1064 * Print a warning if neither of the workarounds for 1065 * erratum 131 is present. 1066 */ 1067 if ((error = checked_rdmsr(msr, &nbcfg)) != 0) { 1068 msr_warning(cpu, "rd", msr, error); 1069 workaround_warning(cpu, 131); 1070 missing++; 1071 } else if ((nbcfg & wabits) == 0) { 1072 opteron_erratum_131++; 1073 } else { 1074 /* cannot have both workarounds set */ 1075 ASSERT((nbcfg & wabits) != wabits); 1076 } 1077 #else 1078 workaround_warning(cpu, 131); 1079 missing++; 1080 #endif 1081 /*CONSTANTCONDITION*/ 1082 } while (0); 1083 1084 /* 1085 * This isn't really an erratum, but for convenience the 1086 * detection/workaround code lives here and in cpuid_opteron_erratum. 1087 */ 1088 if (cpuid_opteron_erratum(cpu, 6336786) > 0) { 1089 #if defined(OPTERON_WORKAROUND_6336786) 1090 /* 1091 * Disable C1-Clock ramping on multi-core/multi-processor 1092 * K8 platforms to guard against TSC drift. 1093 */ 1094 if (opteron_workaround_6336786) { 1095 opteron_workaround_6336786++; 1096 #if defined(__xpv) 1097 } else if ((DOMAIN_IS_INITDOMAIN(xen_info) && 1098 xpv_nr_phys_cpus() > 1) || 1099 opteron_workaround_6336786_UP) { 1100 /* 1101 * XXPV Hmm. We can't walk the Northbridges on 1102 * the hypervisor; so just complain and drive 1103 * on. This probably needs to be fixed in 1104 * the hypervisor itself. 1105 */ 1106 opteron_workaround_6336786++; 1107 workaround_warning(cpu, 6336786); 1108 #else /* __xpv */ 1109 } else if ((opteron_get_nnodes() * 1110 cpuid_get_ncpu_per_chip(cpu) > 1) || 1111 opteron_workaround_6336786_UP) { 1112 1113 uint_t node, nnodes; 1114 uint8_t data; 1115 1116 nnodes = opteron_get_nnodes(); 1117 for (node = 0; node < nnodes; node++) { 1118 /* 1119 * Clear PMM7[1:0] (function 3, offset 0x87) 1120 * Northbridge device is the node id + 24. 1121 */ 1122 data = pci_getb_func(0, node + 24, 3, 0x87); 1123 data &= 0xFC; 1124 pci_putb_func(0, node + 24, 3, 0x87, data); 1125 } 1126 opteron_workaround_6336786++; 1127 #endif /* __xpv */ 1128 } 1129 #else 1130 workaround_warning(cpu, 6336786); 1131 missing++; 1132 #endif 1133 } 1134 1135 /*LINTED*/ 1136 /* 1137 * Mutex primitives don't work as expected. 1138 */ 1139 if (cpuid_opteron_erratum(cpu, 6323525) > 0) { 1140 #if defined(OPTERON_WORKAROUND_6323525) 1141 /* 1142 * This problem only occurs with 2 or more cores. If bit in 1143 * MSR_AMD_BU_CFG set, then not applicable. The workaround 1144 * is to patch the semaphone routines with the lfence 1145 * instruction to provide necessary load memory barrier with 1146 * possible subsequent read-modify-write ops. 1147 * 1148 * It is too early in boot to call the patch routine so 1149 * set erratum variable to be done in startup_end(). 1150 */ 1151 if (opteron_workaround_6323525) { 1152 opteron_workaround_6323525++; 1153 #if defined(__xpv) 1154 } else if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { 1155 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1156 /* 1157 * XXPV Use dom0_msr here when extended 1158 * operations are supported? 1159 */ 1160 if (xpv_nr_phys_cpus() > 1) 1161 opteron_workaround_6323525++; 1162 } else { 1163 /* 1164 * We have no way to tell how many physical 1165 * cpus there are, or even if this processor 1166 * has the problem, so enable the workaround 1167 * unconditionally (at some performance cost). 1168 */ 1169 opteron_workaround_6323525++; 1170 } 1171 #else /* __xpv */ 1172 } else if (is_x86_feature(x86_featureset, X86FSET_SSE2) && 1173 ((opteron_get_nnodes() * 1174 cpuid_get_ncpu_per_chip(cpu)) > 1)) { 1175 if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0) 1176 opteron_workaround_6323525++; 1177 #endif /* __xpv */ 1178 } 1179 #else 1180 workaround_warning(cpu, 6323525); 1181 missing++; 1182 #endif 1183 } 1184 1185 missing += do_erratum_298(cpu); 1186 1187 if (cpuid_opteron_erratum(cpu, 721) > 0) { 1188 #if defined(OPTERON_ERRATUM_721) 1189 wrmsr(MSR_AMD_DE_CFG, rdmsr(MSR_AMD_DE_CFG) | AMD_DE_CFG_E721); 1190 opteron_erratum_721++; 1191 #else 1192 workaround_warning(cpu, 721); 1193 missing++; 1194 #endif 1195 } 1196 1197 #ifdef __xpv 1198 return (0); 1199 #else 1200 return (missing); 1201 #endif 1202 } 1203 1204 void 1205 workaround_errata_end() 1206 { 1207 #if defined(OPTERON_ERRATUM_88) 1208 if (opteron_erratum_88) 1209 workaround_applied(88); 1210 #endif 1211 #if defined(OPTERON_ERRATUM_91) 1212 if (opteron_erratum_91) 1213 workaround_applied(91); 1214 #endif 1215 #if defined(OPTERON_ERRATUM_93) 1216 if (opteron_erratum_93) 1217 workaround_applied(93); 1218 #endif 1219 #if defined(OPTERON_ERRATUM_95) 1220 if (opteron_erratum_95) 1221 workaround_applied(95); 1222 #endif 1223 #if defined(OPTERON_ERRATUM_100) 1224 if (opteron_erratum_100) 1225 workaround_applied(100); 1226 #endif 1227 #if defined(OPTERON_ERRATUM_108) 1228 if (opteron_erratum_108) 1229 workaround_applied(108); 1230 #endif 1231 #if defined(OPTERON_ERRATUM_109) 1232 if (opteron_erratum_109) { 1233 cmn_err(CE_WARN, 1234 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1235 " processor\nerratum 109 was not detected; updating your" 1236 " system's BIOS to a version\ncontaining this" 1237 " microcode patch is HIGHLY recommended or erroneous" 1238 " system\noperation may occur.\n"); 1239 } 1240 #endif 1241 #if defined(OPTERON_ERRATUM_121) 1242 if (opteron_erratum_121) 1243 workaround_applied(121); 1244 #endif 1245 #if defined(OPTERON_ERRATUM_122) 1246 if (opteron_erratum_122) 1247 workaround_applied(122); 1248 #endif 1249 #if defined(OPTERON_ERRATUM_123) 1250 if (opteron_erratum_123) { 1251 cmn_err(CE_WARN, 1252 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1253 " processor\nerratum 123 was not detected; updating your" 1254 " system's BIOS to a version\ncontaining this" 1255 " microcode patch is HIGHLY recommended or erroneous" 1256 " system\noperation may occur.\n"); 1257 } 1258 #endif 1259 #if defined(OPTERON_ERRATUM_131) 1260 if (opteron_erratum_131) { 1261 cmn_err(CE_WARN, 1262 "BIOS microcode patch for AMD Athlon(tm) 64/Opteron(tm)" 1263 " processor\nerratum 131 was not detected; updating your" 1264 " system's BIOS to a version\ncontaining this" 1265 " microcode patch is HIGHLY recommended or erroneous" 1266 " system\noperation may occur.\n"); 1267 } 1268 #endif 1269 #if defined(OPTERON_WORKAROUND_6336786) 1270 if (opteron_workaround_6336786) 1271 workaround_applied(6336786); 1272 #endif 1273 #if defined(OPTERON_WORKAROUND_6323525) 1274 if (opteron_workaround_6323525) 1275 workaround_applied(6323525); 1276 #endif 1277 #if defined(OPTERON_ERRATUM_298) 1278 if (opteron_erratum_298) { 1279 cmn_err(CE_WARN, 1280 "BIOS microcode patch for AMD 64/Opteron(tm)" 1281 " processor\nerratum 298 was not detected; updating your" 1282 " system's BIOS to a version\ncontaining this" 1283 " microcode patch is HIGHLY recommended or erroneous" 1284 " system\noperation may occur.\n"); 1285 } 1286 #endif 1287 #if defined(OPTERON_ERRATUM_721) 1288 if (opteron_erratum_721) 1289 workaround_applied(721); 1290 #endif 1291 } 1292 1293 /* 1294 * The procset_slave and procset_master are used to synchronize 1295 * between the control CPU and the target CPU when starting CPUs. 1296 */ 1297 static cpuset_t procset_slave, procset_master; 1298 1299 static void 1300 mp_startup_wait(cpuset_t *sp, processorid_t cpuid) 1301 { 1302 cpuset_t tempset; 1303 1304 for (tempset = *sp; !CPU_IN_SET(tempset, cpuid); 1305 tempset = *(volatile cpuset_t *)sp) { 1306 SMT_PAUSE(); 1307 } 1308 CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid); 1309 } 1310 1311 static void 1312 mp_startup_signal(cpuset_t *sp, processorid_t cpuid) 1313 { 1314 cpuset_t tempset; 1315 1316 CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid); 1317 for (tempset = *sp; CPU_IN_SET(tempset, cpuid); 1318 tempset = *(volatile cpuset_t *)sp) { 1319 SMT_PAUSE(); 1320 } 1321 } 1322 1323 int 1324 mp_start_cpu_common(cpu_t *cp, boolean_t boot) 1325 { 1326 _NOTE(ARGUNUSED(boot)); 1327 1328 void *ctx; 1329 int delays; 1330 int error = 0; 1331 cpuset_t tempset; 1332 processorid_t cpuid; 1333 #ifndef __xpv 1334 extern void cpupm_init(cpu_t *); 1335 #endif 1336 1337 ASSERT(cp != NULL); 1338 cpuid = cp->cpu_id; 1339 ctx = mach_cpucontext_alloc(cp); 1340 if (ctx == NULL) { 1341 cmn_err(CE_WARN, 1342 "cpu%d: failed to allocate context", cp->cpu_id); 1343 return (EAGAIN); 1344 } 1345 error = mach_cpu_start(cp, ctx); 1346 if (error != 0) { 1347 cmn_err(CE_WARN, 1348 "cpu%d: failed to start, error %d", cp->cpu_id, error); 1349 mach_cpucontext_free(cp, ctx, error); 1350 return (error); 1351 } 1352 1353 for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid); 1354 delays++) { 1355 if (delays == 500) { 1356 /* 1357 * After five seconds, things are probably looking 1358 * a bit bleak - explain the hang. 1359 */ 1360 cmn_err(CE_NOTE, "cpu%d: started, " 1361 "but not running in the kernel yet", cpuid); 1362 } else if (delays > 2000) { 1363 /* 1364 * We waited at least 20 seconds, bail .. 1365 */ 1366 error = ETIMEDOUT; 1367 cmn_err(CE_WARN, "cpu%d: timed out", cpuid); 1368 mach_cpucontext_free(cp, ctx, error); 1369 return (error); 1370 } 1371 1372 /* 1373 * wait at least 10ms, then check again.. 1374 */ 1375 delay(USEC_TO_TICK_ROUNDUP(10000)); 1376 tempset = *((volatile cpuset_t *)&procset_slave); 1377 } 1378 CPUSET_ATOMIC_DEL(procset_slave, cpuid); 1379 1380 mach_cpucontext_free(cp, ctx, 0); 1381 1382 #ifndef __xpv 1383 if (tsc_gethrtime_enable) 1384 tsc_sync_master(cpuid); 1385 #endif 1386 1387 if (dtrace_cpu_init != NULL) { 1388 (*dtrace_cpu_init)(cpuid); 1389 } 1390 1391 /* 1392 * During CPU DR operations, the cpu_lock is held by current 1393 * (the control) thread. We can't release the cpu_lock here 1394 * because that will break the CPU DR logic. 1395 * On the other hand, CPUPM and processor group initialization 1396 * routines need to access the cpu_lock. So we invoke those 1397 * routines here on behalf of mp_startup_common(). 1398 * 1399 * CPUPM and processor group initialization routines depend 1400 * on the cpuid probing results. Wait for mp_startup_common() 1401 * to signal that cpuid probing is done. 1402 */ 1403 mp_startup_wait(&procset_slave, cpuid); 1404 #ifndef __xpv 1405 cpupm_init(cp); 1406 #endif 1407 (void) pg_cpu_init(cp, B_FALSE); 1408 cpu_set_state(cp); 1409 mp_startup_signal(&procset_master, cpuid); 1410 1411 return (0); 1412 } 1413 1414 /* 1415 * Start a single cpu, assuming that the kernel context is available 1416 * to successfully start another cpu. 1417 * 1418 * (For example, real mode code is mapped into the right place 1419 * in memory and is ready to be run.) 1420 */ 1421 int 1422 start_cpu(processorid_t who) 1423 { 1424 cpu_t *cp; 1425 int error = 0; 1426 cpuset_t tempset; 1427 1428 ASSERT(who != 0); 1429 1430 /* 1431 * Check if there's at least a Mbyte of kmem available 1432 * before attempting to start the cpu. 1433 */ 1434 if (kmem_avail() < 1024 * 1024) { 1435 /* 1436 * Kick off a reap in case that helps us with 1437 * later attempts .. 1438 */ 1439 kmem_reap(); 1440 return (ENOMEM); 1441 } 1442 1443 /* 1444 * First configure cpu. 1445 */ 1446 cp = mp_cpu_configure_common(who, B_TRUE); 1447 ASSERT(cp != NULL); 1448 1449 /* 1450 * Then start cpu. 1451 */ 1452 error = mp_start_cpu_common(cp, B_TRUE); 1453 if (error != 0) { 1454 mp_cpu_unconfigure_common(cp, error); 1455 return (error); 1456 } 1457 1458 mutex_exit(&cpu_lock); 1459 tempset = cpu_ready_set; 1460 while (!CPU_IN_SET(tempset, who)) { 1461 drv_usecwait(1); 1462 tempset = *((volatile cpuset_t *)&cpu_ready_set); 1463 } 1464 mutex_enter(&cpu_lock); 1465 1466 return (0); 1467 } 1468 1469 void 1470 start_other_cpus(int cprboot) 1471 { 1472 _NOTE(ARGUNUSED(cprboot)); 1473 1474 uint_t who; 1475 uint_t bootcpuid = 0; 1476 1477 /* 1478 * Initialize our own cpu_info. 1479 */ 1480 init_cpu_info(CPU); 1481 1482 cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_idstr); 1483 cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_brandstr); 1484 1485 /* 1486 * Initialize our syscall handlers 1487 */ 1488 init_cpu_syscall(CPU); 1489 1490 /* 1491 * Take the boot cpu out of the mp_cpus set because we know 1492 * it's already running. Add it to the cpu_ready_set for 1493 * precisely the same reason. 1494 */ 1495 CPUSET_DEL(mp_cpus, bootcpuid); 1496 CPUSET_ADD(cpu_ready_set, bootcpuid); 1497 1498 /* 1499 * skip the rest of this if 1500 * . only 1 cpu dectected and system isn't hotplug-capable 1501 * . not using MP 1502 */ 1503 if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) || 1504 use_mp == 0) { 1505 if (use_mp == 0) 1506 cmn_err(CE_CONT, "?***** Not in MP mode\n"); 1507 goto done; 1508 } 1509 1510 /* 1511 * perform such initialization as is needed 1512 * to be able to take CPUs on- and off-line. 1513 */ 1514 cpu_pause_init(); 1515 1516 xc_init_cpu(CPU); /* initialize processor crosscalls */ 1517 1518 if (mach_cpucontext_init() != 0) 1519 goto done; 1520 1521 flushes_require_xcalls = 1; 1522 1523 /* 1524 * We lock our affinity to the master CPU to ensure that all slave CPUs 1525 * do their TSC syncs with the same CPU. 1526 */ 1527 affinity_set(CPU_CURRENT); 1528 1529 for (who = 0; who < NCPU; who++) { 1530 if (!CPU_IN_SET(mp_cpus, who)) 1531 continue; 1532 ASSERT(who != bootcpuid); 1533 1534 mutex_enter(&cpu_lock); 1535 if (start_cpu(who) != 0) 1536 CPUSET_DEL(mp_cpus, who); 1537 cpu_state_change_notify(who, CPU_SETUP); 1538 mutex_exit(&cpu_lock); 1539 } 1540 1541 /* Free the space allocated to hold the microcode file */ 1542 ucode_cleanup(); 1543 1544 affinity_clear(); 1545 1546 mach_cpucontext_fini(); 1547 1548 done: 1549 if (get_hwenv() == HW_NATIVE) 1550 workaround_errata_end(); 1551 cmi_post_mpstartup(); 1552 1553 if (use_mp && ncpus != boot_max_ncpus) { 1554 cmn_err(CE_NOTE, 1555 "System detected %d cpus, but " 1556 "only %d cpu(s) were enabled during boot.", 1557 boot_max_ncpus, ncpus); 1558 cmn_err(CE_NOTE, 1559 "Use \"boot-ncpus\" parameter to enable more CPU(s). " 1560 "See eeprom(1M)."); 1561 } 1562 } 1563 1564 int 1565 mp_cpu_configure(int cpuid) 1566 { 1567 cpu_t *cp; 1568 1569 if (use_mp == 0 || plat_dr_support_cpu() == 0) { 1570 return (ENOTSUP); 1571 } 1572 1573 cp = cpu_get(cpuid); 1574 if (cp != NULL) { 1575 return (EALREADY); 1576 } 1577 1578 /* 1579 * Check if there's at least a Mbyte of kmem available 1580 * before attempting to start the cpu. 1581 */ 1582 if (kmem_avail() < 1024 * 1024) { 1583 /* 1584 * Kick off a reap in case that helps us with 1585 * later attempts .. 1586 */ 1587 kmem_reap(); 1588 return (ENOMEM); 1589 } 1590 1591 cp = mp_cpu_configure_common(cpuid, B_FALSE); 1592 ASSERT(cp != NULL && cpu_get(cpuid) == cp); 1593 1594 return (cp != NULL ? 0 : EAGAIN); 1595 } 1596 1597 int 1598 mp_cpu_unconfigure(int cpuid) 1599 { 1600 cpu_t *cp; 1601 1602 if (use_mp == 0 || plat_dr_support_cpu() == 0) { 1603 return (ENOTSUP); 1604 } else if (cpuid < 0 || cpuid >= max_ncpus) { 1605 return (EINVAL); 1606 } 1607 1608 cp = cpu_get(cpuid); 1609 if (cp == NULL) { 1610 return (ENODEV); 1611 } 1612 mp_cpu_unconfigure_common(cp, 0); 1613 1614 return (0); 1615 } 1616 1617 /* 1618 * Startup function for 'other' CPUs (besides boot cpu). 1619 * Called from real_mode_start. 1620 * 1621 * WARNING: until CPU_READY is set, mp_startup_common and routines called by 1622 * mp_startup_common should not call routines (e.g. kmem_free) that could call 1623 * hat_unload which requires CPU_READY to be set. 1624 */ 1625 static void 1626 mp_startup_common(boolean_t boot) 1627 { 1628 cpu_t *cp = CPU; 1629 uchar_t new_x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 1630 extern void cpu_event_init_cpu(cpu_t *); 1631 1632 /* 1633 * We need to get TSC on this proc synced (i.e., any delta 1634 * from cpu0 accounted for) as soon as we can, because many 1635 * many things use gethrtime/pc_gethrestime, including 1636 * interrupts, cmn_err, etc. 1637 */ 1638 1639 /* Let the control CPU continue into tsc_sync_master() */ 1640 mp_startup_signal(&procset_slave, cp->cpu_id); 1641 1642 #ifndef __xpv 1643 if (tsc_gethrtime_enable) 1644 tsc_sync_slave(); 1645 #endif 1646 1647 /* 1648 * Once this was done from assembly, but it's safer here; if 1649 * it blocks, we need to be able to swtch() to and from, and 1650 * since we get here by calling t_pc, we need to do that call 1651 * before swtch() overwrites it. 1652 */ 1653 (void) (*ap_mlsetup)(); 1654 1655 bzero(new_x86_featureset, BT_SIZEOFMAP(NUM_X86_FEATURES)); 1656 cpuid_pass1(cp, new_x86_featureset); 1657 1658 #ifndef __xpv 1659 /* 1660 * Program this cpu's PAT 1661 */ 1662 if (is_x86_feature(x86_featureset, X86FSET_PAT)) 1663 pat_sync(); 1664 #endif 1665 1666 /* 1667 * Set up TSC_AUX to contain the cpuid for this processor 1668 * for the rdtscp instruction. 1669 */ 1670 if (is_x86_feature(x86_featureset, X86FSET_TSCP)) 1671 (void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id); 1672 1673 /* 1674 * Initialize this CPU's syscall handlers 1675 */ 1676 init_cpu_syscall(cp); 1677 1678 /* 1679 * Enable interrupts with spl set to LOCK_LEVEL. LOCK_LEVEL is the 1680 * highest level at which a routine is permitted to block on 1681 * an adaptive mutex (allows for cpu poke interrupt in case 1682 * the cpu is blocked on a mutex and halts). Setting LOCK_LEVEL blocks 1683 * device interrupts that may end up in the hat layer issuing cross 1684 * calls before CPU_READY is set. 1685 */ 1686 splx(ipltospl(LOCK_LEVEL)); 1687 sti(); 1688 1689 /* 1690 * Do a sanity check to make sure this new CPU is a sane thing 1691 * to add to the collection of processors running this system. 1692 * 1693 * XXX Clearly this needs to get more sophisticated, if x86 1694 * systems start to get built out of heterogenous CPUs; as is 1695 * likely to happen once the number of processors in a configuration 1696 * gets large enough. 1697 */ 1698 if (compare_x86_featureset(x86_featureset, new_x86_featureset) == 1699 B_FALSE) { 1700 cmn_err(CE_CONT, "cpu%d: featureset\n", cp->cpu_id); 1701 print_x86_featureset(new_x86_featureset); 1702 cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id); 1703 } 1704 1705 /* 1706 * We do not support cpus with mixed monitor/mwait support if the 1707 * boot cpu supports monitor/mwait. 1708 */ 1709 if (is_x86_feature(x86_featureset, X86FSET_MWAIT) != 1710 is_x86_feature(new_x86_featureset, X86FSET_MWAIT)) 1711 panic("unsupported mixed cpu monitor/mwait support detected"); 1712 1713 /* 1714 * We could be more sophisticated here, and just mark the CPU 1715 * as "faulted" but at this point we'll opt for the easier 1716 * answer of dying horribly. Provided the boot cpu is ok, 1717 * the system can be recovered by booting with use_mp set to zero. 1718 */ 1719 if (workaround_errata(cp) != 0) 1720 panic("critical workaround(s) missing for cpu%d", cp->cpu_id); 1721 1722 /* 1723 * We can touch cpu_flags here without acquiring the cpu_lock here 1724 * because the cpu_lock is held by the control CPU which is running 1725 * mp_start_cpu_common(). 1726 * Need to clear CPU_QUIESCED flag before calling any function which 1727 * may cause thread context switching, such as kmem_alloc() etc. 1728 * The idle thread checks for CPU_QUIESCED flag and loops for ever if 1729 * it's set. So the startup thread may have no chance to switch back 1730 * again if it's switched away with CPU_QUIESCED set. 1731 */ 1732 cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED); 1733 1734 /* 1735 * Setup this processor for XSAVE. 1736 */ 1737 if (fp_save_mech == FP_XSAVE) { 1738 xsave_setup_msr(cp); 1739 } 1740 1741 cpuid_pass2(cp); 1742 cpuid_pass3(cp); 1743 cpuid_pass4(cp, NULL); 1744 1745 /* 1746 * Correct cpu_idstr and cpu_brandstr on target CPU after 1747 * cpuid_pass1() is done. 1748 */ 1749 (void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN); 1750 (void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN); 1751 1752 cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS; 1753 1754 post_startup_cpu_fixups(); 1755 1756 cpu_event_init_cpu(cp); 1757 1758 /* 1759 * Enable preemption here so that contention for any locks acquired 1760 * later in mp_startup_common may be preempted if the thread owning 1761 * those locks is continuously executing on other CPUs (for example, 1762 * this CPU must be preemptible to allow other CPUs to pause it during 1763 * their startup phases). It's safe to enable preemption here because 1764 * the CPU state is pretty-much fully constructed. 1765 */ 1766 curthread->t_preempt = 0; 1767 1768 /* The base spl should still be at LOCK LEVEL here */ 1769 ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL)); 1770 set_base_spl(); /* Restore the spl to its proper value */ 1771 1772 pghw_physid_create(cp); 1773 /* 1774 * Delegate initialization tasks, which need to access the cpu_lock, 1775 * to mp_start_cpu_common() because we can't acquire the cpu_lock here 1776 * during CPU DR operations. 1777 */ 1778 mp_startup_signal(&procset_slave, cp->cpu_id); 1779 mp_startup_wait(&procset_master, cp->cpu_id); 1780 pg_cmt_cpu_startup(cp); 1781 1782 if (boot) { 1783 mutex_enter(&cpu_lock); 1784 cp->cpu_flags &= ~CPU_OFFLINE; 1785 cpu_enable_intr(cp); 1786 cpu_add_active(cp); 1787 mutex_exit(&cpu_lock); 1788 } 1789 1790 /* Enable interrupts */ 1791 (void) spl0(); 1792 1793 /* 1794 * Fill out cpu_ucode_info. Update microcode if necessary. 1795 */ 1796 ucode_check(cp); 1797 1798 #ifndef __xpv 1799 { 1800 /* 1801 * Set up the CPU module for this CPU. This can't be done 1802 * before this CPU is made CPU_READY, because we may (in 1803 * heterogeneous systems) need to go load another CPU module. 1804 * The act of attempting to load a module may trigger a 1805 * cross-call, which will ASSERT unless this cpu is CPU_READY. 1806 */ 1807 cmi_hdl_t hdl; 1808 1809 if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU), 1810 cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) { 1811 if (is_x86_feature(x86_featureset, X86FSET_MCA)) 1812 cmi_mca_init(hdl); 1813 cp->cpu_m.mcpu_cmi_hdl = hdl; 1814 } 1815 } 1816 #endif /* __xpv */ 1817 1818 if (boothowto & RB_DEBUG) 1819 kdi_cpu_init(); 1820 1821 /* 1822 * Setting the bit in cpu_ready_set must be the last operation in 1823 * processor initialization; the boot CPU will continue to boot once 1824 * it sees this bit set for all active CPUs. 1825 */ 1826 CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id); 1827 1828 (void) mach_cpu_create_device_node(cp, NULL); 1829 1830 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr); 1831 cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr); 1832 cmn_err(CE_CONT, "?cpu%d initialization complete - online\n", 1833 cp->cpu_id); 1834 1835 /* 1836 * Now we are done with the startup thread, so free it up. 1837 */ 1838 thread_exit(); 1839 panic("mp_startup: cannot return"); 1840 /*NOTREACHED*/ 1841 } 1842 1843 /* 1844 * Startup function for 'other' CPUs at boot time (besides boot cpu). 1845 */ 1846 static void 1847 mp_startup_boot(void) 1848 { 1849 mp_startup_common(B_TRUE); 1850 } 1851 1852 /* 1853 * Startup function for hotplug CPUs at runtime. 1854 */ 1855 void 1856 mp_startup_hotplug(void) 1857 { 1858 mp_startup_common(B_FALSE); 1859 } 1860 1861 /* 1862 * Start CPU on user request. 1863 */ 1864 /* ARGSUSED */ 1865 int 1866 mp_cpu_start(struct cpu *cp) 1867 { 1868 ASSERT(MUTEX_HELD(&cpu_lock)); 1869 return (0); 1870 } 1871 1872 /* 1873 * Stop CPU on user request. 1874 */ 1875 int 1876 mp_cpu_stop(struct cpu *cp) 1877 { 1878 extern int cbe_psm_timer_mode; 1879 ASSERT(MUTEX_HELD(&cpu_lock)); 1880 1881 #ifdef __xpv 1882 /* 1883 * We can't offline vcpu0. 1884 */ 1885 if (cp->cpu_id == 0) 1886 return (EBUSY); 1887 #endif 1888 1889 /* 1890 * If TIMER_PERIODIC mode is used, CPU0 is the one running it; 1891 * can't stop it. (This is true only for machines with no TSC.) 1892 */ 1893 1894 if ((cbe_psm_timer_mode == TIMER_PERIODIC) && (cp->cpu_id == 0)) 1895 return (EBUSY); 1896 1897 return (0); 1898 } 1899 1900 /* 1901 * Take the specified CPU out of participation in interrupts. 1902 */ 1903 int 1904 cpu_disable_intr(struct cpu *cp) 1905 { 1906 if (psm_disable_intr(cp->cpu_id) != DDI_SUCCESS) 1907 return (EBUSY); 1908 1909 cp->cpu_flags &= ~CPU_ENABLE; 1910 return (0); 1911 } 1912 1913 /* 1914 * Allow the specified CPU to participate in interrupts. 1915 */ 1916 void 1917 cpu_enable_intr(struct cpu *cp) 1918 { 1919 ASSERT(MUTEX_HELD(&cpu_lock)); 1920 cp->cpu_flags |= CPU_ENABLE; 1921 psm_enable_intr(cp->cpu_id); 1922 } 1923 1924 void 1925 mp_cpu_faulted_enter(struct cpu *cp) 1926 { 1927 #ifdef __xpv 1928 _NOTE(ARGUNUSED(cp)); 1929 #else 1930 cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl; 1931 1932 if (hdl != NULL) { 1933 cmi_hdl_hold(hdl); 1934 } else { 1935 hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1936 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1937 } 1938 if (hdl != NULL) { 1939 cmi_faulted_enter(hdl); 1940 cmi_hdl_rele(hdl); 1941 } 1942 #endif 1943 } 1944 1945 void 1946 mp_cpu_faulted_exit(struct cpu *cp) 1947 { 1948 #ifdef __xpv 1949 _NOTE(ARGUNUSED(cp)); 1950 #else 1951 cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl; 1952 1953 if (hdl != NULL) { 1954 cmi_hdl_hold(hdl); 1955 } else { 1956 hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 1957 cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 1958 } 1959 if (hdl != NULL) { 1960 cmi_faulted_exit(hdl); 1961 cmi_hdl_rele(hdl); 1962 } 1963 #endif 1964 } 1965 1966 /* 1967 * The following two routines are used as context operators on threads belonging 1968 * to processes with a private LDT (see sysi86). Due to the rarity of such 1969 * processes, these routines are currently written for best code readability and 1970 * organization rather than speed. We could avoid checking x86_featureset at 1971 * every context switch by installing different context ops, depending on 1972 * x86_featureset, at LDT creation time -- one for each combination of fast 1973 * syscall features. 1974 */ 1975 1976 /*ARGSUSED*/ 1977 void 1978 cpu_fast_syscall_disable(void *arg) 1979 { 1980 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 1981 is_x86_feature(x86_featureset, X86FSET_SEP)) 1982 cpu_sep_disable(); 1983 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 1984 is_x86_feature(x86_featureset, X86FSET_ASYSC)) 1985 cpu_asysc_disable(); 1986 } 1987 1988 /*ARGSUSED*/ 1989 void 1990 cpu_fast_syscall_enable(void *arg) 1991 { 1992 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 1993 is_x86_feature(x86_featureset, X86FSET_SEP)) 1994 cpu_sep_enable(); 1995 if (is_x86_feature(x86_featureset, X86FSET_MSR) && 1996 is_x86_feature(x86_featureset, X86FSET_ASYSC)) 1997 cpu_asysc_enable(); 1998 } 1999 2000 static void 2001 cpu_sep_enable(void) 2002 { 2003 ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP)); 2004 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2005 2006 wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL); 2007 } 2008 2009 static void 2010 cpu_sep_disable(void) 2011 { 2012 ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP)); 2013 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2014 2015 /* 2016 * Setting the SYSENTER_CS_MSR register to 0 causes software executing 2017 * the sysenter or sysexit instruction to trigger a #gp fault. 2018 */ 2019 wrmsr(MSR_INTC_SEP_CS, 0); 2020 } 2021 2022 static void 2023 cpu_asysc_enable(void) 2024 { 2025 ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC)); 2026 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2027 2028 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) | 2029 (uint64_t)(uintptr_t)AMD_EFER_SCE); 2030 } 2031 2032 static void 2033 cpu_asysc_disable(void) 2034 { 2035 ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC)); 2036 ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL); 2037 2038 /* 2039 * Turn off the SCE (syscall enable) bit in the EFER register. Software 2040 * executing syscall or sysret with this bit off will incur a #ud trap. 2041 */ 2042 wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) & 2043 ~((uint64_t)(uintptr_t)AMD_EFER_SCE)); 2044 }