1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net> 26 */ 27 /* 28 * Copyright (c) 2010, Intel Corporation. 29 * All rights reserved. 30 */ 31 /* 32 * Portions Copyright 2009 Advanced Micro Devices, Inc. 33 */ 34 /* 35 * Copyright 2019, Joyent, Inc. 36 */ 37 38 /* 39 * CPU Identification logic 40 * 41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal 42 * with the identification of CPUs, their features, and their topologies. More 43 * specifically, this file helps drive the following: 44 * 45 * 1. Enumeration of features of the processor which are used by the kernel to 46 * determine what features to enable or disable. These may be instruction set 47 * enhancements or features that we use. 48 * 49 * 2. Enumeration of instruction set architecture (ISA) additions that userland 50 * will be told about through the auxiliary vector. 51 * 52 * 3. Understanding the physical topology of the CPU such as the number of 53 * caches, how many cores it has, whether or not it supports symmetric 54 * multi-processing (SMT), etc. 55 * 56 * ------------------------ 57 * CPUID History and Basics 58 * ------------------------ 59 * 60 * The cpuid instruction was added by Intel roughly around the time that the 61 * original Pentium was introduced. The purpose of cpuid was to tell in a 62 * programmatic fashion information about the CPU that previously was guessed 63 * at. For example, an important part of cpuid is that we can know what 64 * extensions to the ISA exist. If you use an invalid opcode you would get a 65 * #UD, so this method allows a program (whether a user program or the kernel) 66 * to determine what exists without crashing or getting a SIGILL. Of course, 67 * this was also during the era of the clones and the AMD Am5x86. The vendor 68 * name shows up first in cpuid for a reason. 69 * 70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts 71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has 72 * its own meaning. The different leaves are broken down into different regions: 73 * 74 * [ 0, 7fffffff ] This region is called the 'basic' 75 * region. This region is generally defined 76 * by Intel, though some of the original 77 * portions have different meanings based 78 * on the manufacturer. These days, Intel 79 * adds most new features to this region. 80 * AMD adds non-Intel compatible 81 * information in the third, extended 82 * region. Intel uses this for everything 83 * including ISA extensions, CPU 84 * features, cache information, topology, 85 * and more. 86 * 87 * There is a hole carved out of this 88 * region which is reserved for 89 * hypervisors. 90 * 91 * [ 40000000, 4fffffff ] This region, which is found in the 92 * middle of the previous region, is 93 * explicitly promised to never be used by 94 * CPUs. Instead, it is used by hypervisors 95 * to communicate information about 96 * themselves to the operating system. The 97 * values and details are unique for each 98 * hypervisor. 99 * 100 * [ 80000000, ffffffff ] This region is called the 'extended' 101 * region. Some of the low leaves mirror 102 * parts of the basic leaves. This region 103 * has generally been used by AMD for 104 * various extensions. For example, AMD- 105 * specific information about caches, 106 * features, and topology are found in this 107 * region. 108 * 109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx, 110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of 111 * the ranges, one of the primary things returned is the maximum valid leaf in 112 * that range. This allows for discovery of what range of CPUID is valid. 113 * 114 * The CPUs have potentially surprising behavior when using an invalid leaf or 115 * unimplemented leaf. If the requested leaf is within the valid basic or 116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be 117 * set to zero. However, if you specify a leaf that is outside of a valid range, 118 * then instead it will be filled with the last valid _basic_ leaf. For example, 119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or 120 * an invalid extended leaf will return the information for leaf 3. 121 * 122 * Some leaves are broken down into sub-leaves. This means that the value 123 * depends on both the leaf asked for in %eax and a secondary register. For 124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get 125 * additional information. Or when getting topology information in leaf 0xb, the 126 * initial value in %ecx changes which level of the topology that you are 127 * getting information about. 128 * 129 * cpuid values are always kept to 32 bits regardless of whether or not the 130 * program is in 64-bit mode. When executing in 64-bit mode, the upper 131 * 32 bits of the register are always set to zero so that way the values are the 132 * same regardless of execution mode. 133 * 134 * ---------------------- 135 * Identifying Processors 136 * ---------------------- 137 * 138 * We can identify a processor in two steps. The first step looks at cpuid leaf 139 * 0. Leaf 0 contains the processor's vendor information. This is done by 140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is 141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'. 142 * 143 * From there, a processor is identified by a combination of three different 144 * values: 145 * 146 * 1. Family 147 * 2. Model 148 * 3. Stepping 149 * 150 * Each vendor uses the family and model to uniquely identify a processor. The 151 * way that family and model are changed depends on the vendor. For example, 152 * Intel has been using family 0x6 for almost all of their processor since the 153 * Pentium Pro/Pentium II era, often called the P6. The model is used to 154 * identify the exact processor. Different models are often used for the client 155 * (consumer) and server parts. Even though each processor often has major 156 * architectural differences, they still are considered the same family by 157 * Intel. 158 * 159 * On the other hand, each major AMD architecture generally has its own family. 160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it 161 * the model number is used to help identify specific processors. 162 * 163 * The stepping is used to refer to a revision of a specific microprocessor. The 164 * term comes from equipment used to produce masks that are used to create 165 * integrated circuits. 166 * 167 * The information is present in leaf 1, %eax. In technical documentation you 168 * will see the terms extended model and extended family. The original family, 169 * model, and stepping fields were each 4 bits wide. If the values in either 170 * are 0xf, then one is to consult the extended model and extended family, which 171 * take previously reserved bits and allow for a larger number of models and add 172 * 0xf to them. 173 * 174 * When we process this information, we store the full family, model, and 175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and 176 * cpi_step, respectively. Whenever you are performing comparisons with the 177 * family, model, and stepping, you should use these members and not the raw 178 * values from cpuid. If you must use the raw values from cpuid directly, you 179 * must make sure that you add the extended model and family to the base model 180 * and family. 181 * 182 * In general, we do not use information about the family, model, and stepping 183 * to determine whether or not a feature is present; that is generally driven by 184 * specific leaves. However, when something we care about on the processor is 185 * not considered 'architectural' meaning that it is specific to a set of 186 * processors and not promised in the architecture model to be consistent from 187 * generation to generation, then we will fall back on this information. The 188 * most common cases where this comes up is when we have to workaround errata in 189 * the processor, are dealing with processor-specific features such as CPU 190 * performance counters, or we want to provide additional information for things 191 * such as fault management. 192 * 193 * While processors also do have a brand string, which is the name that people 194 * are familiar with when buying the processor, they are not meant for 195 * programmatic consumption. That is what the family, model, and stepping are 196 * for. 197 * 198 * ------------ 199 * CPUID Passes 200 * ------------ 201 * 202 * As part of performing feature detection, we break this into several different 203 * passes. The passes are as follows: 204 * 205 * Pass 0 This is a primordial pass done in locore.s to deal with 206 * Cyrix CPUs that don't support cpuid. The reality is that 207 * we likely don't run on them any more, but there is still 208 * logic for handling them. 209 * 210 * Pass 1 This is the primary pass and is responsible for doing a 211 * large number of different things: 212 * 213 * 1. Determine which vendor manufactured the CPU and 214 * determining the family, model, and stepping information. 215 * 216 * 2. Gathering a large number of feature flags to 217 * determine which features the CPU support and which 218 * indicate things that we need to do other work in the OS 219 * to enable. Features detected this way are added to the 220 * x86_featureset which can be queried to 221 * determine what we should do. This includes processing 222 * all of the basic and extended CPU features that we care 223 * about. 224 * 225 * 3. Determining the CPU's topology. This includes 226 * information about how many cores and threads are present 227 * in the package. It also is responsible for figuring out 228 * which logical CPUs are potentially part of the same core 229 * and what other resources they might share. For more 230 * information see the 'Topology' section. 231 * 232 * 4. Determining the set of CPU security-specific features 233 * that we need to worry about and determine the 234 * appropriate set of workarounds. 235 * 236 * Pass 1 on the boot CPU occurs before KMDB is started. 237 * 238 * Pass 2 The second pass is done after startup(). Here, we check 239 * other miscellaneous features. Most of this is gathering 240 * additional basic and extended features that we'll use in 241 * later passes or for debugging support. 242 * 243 * Pass 3 The third pass occurs after the kernel memory allocator 244 * has been fully initialized. This gathers information 245 * where we might need dynamic memory available for our 246 * uses. This includes several varying width leaves that 247 * have cache information and the processor's brand string. 248 * 249 * Pass 4 The fourth and final normal pass is performed after the 250 * kernel has brought most everything online. This is 251 * invoked from post_startup(). In this pass, we go through 252 * the set of features that we have enabled and turn that 253 * into the hardware auxiliary vector features that 254 * userland receives. This is used by userland, primarily 255 * by the run-time link-editor (RTLD), though userland 256 * software could also refer to it directly. 257 * 258 * Microcode After a microcode update, we do a selective rescan of 259 * the cpuid leaves to determine what features have 260 * changed. Microcode updates can provide more details 261 * about security related features to deal with issues like 262 * Spectre and L1TF. On occasion, vendors have violated 263 * their contract and removed bits. However, we don't try 264 * to detect that because that puts us in a situation that 265 * we really can't deal with. As such, the only thing we 266 * rescan are security related features today. See 267 * cpuid_pass_ucode(). 268 * 269 * All of the passes (except pass 0) are run on all CPUs. However, for the most 270 * part we only care about what the boot CPU says about this information and use 271 * the other CPUs as a rough guide to sanity check that we have the same feature 272 * set. 273 * 274 * We do not support running multiple logical CPUs with disjoint, let alone 275 * different, feature sets. 276 * 277 * ------------------ 278 * Processor Topology 279 * ------------------ 280 * 281 * One of the important things that we need to do is to understand the topology 282 * of the underlying processor. When we say topology in this case, we're trying 283 * to understand the relationship between the logical CPUs that the operating 284 * system sees and the underlying physical layout. Different logical CPUs may 285 * share different resources which can have important consequences for the 286 * performance of the system. For example, they may share caches, execution 287 * units, and more. 288 * 289 * The topology of the processor changes from generation to generation and 290 * vendor to vendor. Along with that, different vendors use different 291 * terminology, and the operating system itself uses occasionally overlapping 292 * terminology. It's important to understand what this topology looks like so 293 * one can understand the different things that we try to calculate and 294 * determine. 295 * 296 * To get started, let's talk about a little bit of terminology that we've used 297 * so far, is used throughout this file, and is fairly generic across multiple 298 * vendors: 299 * 300 * CPU 301 * A central processing unit (CPU) refers to a logical and/or virtual 302 * entity that the operating system can execute instructions on. The 303 * underlying resources for this CPU may be shared between multiple 304 * entities; however, to the operating system it is a discrete unit. 305 * 306 * PROCESSOR and PACKAGE 307 * 308 * Generally, when we use the term 'processor' on its own, we are referring 309 * to the physical entity that one buys and plugs into a board. However, 310 * because processor has been overloaded and one might see it used to mean 311 * multiple different levels, we will instead use the term 'package' for 312 * the rest of this file. The term package comes from the electrical 313 * engineering side and refers to the physical entity that encloses the 314 * electronics inside. Strictly speaking the package can contain more than 315 * just the CPU, for example, on many processors it may also have what's 316 * called an 'integrated graphical processing unit (GPU)'. Because the 317 * package can encapsulate multiple units, it is the largest physical unit 318 * that we refer to. 319 * 320 * SOCKET 321 * 322 * A socket refers to unit on a system board (generally the motherboard) 323 * that can receive a package. A single package, or processor, is plugged 324 * into a single socket. A system may have multiple sockets. Often times, 325 * the term socket is used interchangeably with package and refers to the 326 * electrical component that has plugged in, and not the receptacle itself. 327 * 328 * CORE 329 * 330 * A core refers to the physical instantiation of a CPU, generally, with a 331 * full set of hardware resources available to it. A package may contain 332 * multiple cores inside of it or it may just have a single one. A 333 * processor with more than one core is often referred to as 'multi-core'. 334 * In illumos, we will use the feature X86FSET_CMP to refer to a system 335 * that has 'multi-core' processors. 336 * 337 * A core may expose a single logical CPU to the operating system, or it 338 * may expose multiple CPUs, which we call threads, defined below. 339 * 340 * Some resources may still be shared by cores in the same package. For 341 * example, many processors will share the level 3 cache between cores. 342 * Some AMD generations share hardware resources between cores. For more 343 * information on that see the section 'AMD Topology'. 344 * 345 * THREAD and STRAND 346 * 347 * In this file, generally a thread refers to a hardware resources and not 348 * the operating system's logical abstraction. A thread is always exposed 349 * as an independent logical CPU to the operating system. A thread belongs 350 * to a specific core. A core may have more than one thread. When that is 351 * the case, the threads that are part of the same core are often referred 352 * to as 'siblings'. 353 * 354 * When multiple threads exist, this is generally referred to as 355 * simultaneous multi-threading (SMT). When Intel introduced this in their 356 * processors they called it hyper-threading (HT). When multiple threads 357 * are active in a core, they split the resources of the core. For example, 358 * two threads may share the same set of hardware execution units. 359 * 360 * The operating system often uses the term 'strand' to refer to a thread. 361 * This helps disambiguate it from the software concept. 362 * 363 * CHIP 364 * 365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most 366 * base meaning, it is used to refer to a single integrated circuit, which 367 * may or may not be the only thing in the package. In illumos, when you 368 * see the term 'chip' it is almost always referring to the same thing as 369 * the 'package'. However, many vendors may use chip to refer to one of 370 * many integrated circuits that have been placed in the package. As an 371 * example, see the subsequent definition. 372 * 373 * To try and keep things consistent, we will only use chip when referring 374 * to the entire integrated circuit package, with the exception of the 375 * definition of multi-chip module (because it is in the name) and use the 376 * term 'die' when we want the more general, potential sub-component 377 * definition. 378 * 379 * DIE 380 * 381 * A die refers to an integrated circuit. Inside of the package there may 382 * be a single die or multiple dies. This is sometimes called a 'chip' in 383 * vendor's parlance, but in this file, we use the term die to refer to a 384 * subcomponent. 385 * 386 * MULTI-CHIP MODULE 387 * 388 * A multi-chip module (MCM) refers to putting multiple distinct chips that 389 * are connected together in the same package. When a multi-chip design is 390 * used, generally each chip is manufactured independently and then joined 391 * together in the package. For example, on AMD's Zen microarchitecture 392 * (family 0x17), the package contains several dies (the second meaning of 393 * chip from above) that are connected together. 394 * 395 * CACHE 396 * 397 * A cache is a part of the processor that maintains copies of recently 398 * accessed memory. Caches are split into levels and then into types. 399 * Commonly there are one to three levels, called level one, two, and 400 * three. The lower the level, the smaller it is, the closer it is to the 401 * execution units of the CPU, and the faster it is to access. The layout 402 * and design of the cache come in many different flavors, consult other 403 * resources for a discussion of those. 404 * 405 * Caches are generally split into two types, the instruction and data 406 * cache. The caches contain what their names suggest, the instruction 407 * cache has executable program text, while the data cache has all other 408 * memory that the processor accesses. As of this writing, data is kept 409 * coherent between all of the caches on x86, so if one modifies program 410 * text before it is executed, that will be in the data cache, and the 411 * instruction cache will be synchronized with that change when the 412 * processor actually executes those instructions. This coherency also 413 * covers the fact that data could show up in multiple caches. 414 * 415 * Generally, the lowest level caches are specific to a core. However, the 416 * last layer cache is shared between some number of cores. The number of 417 * CPUs sharing this last level cache is important. This has implications 418 * for the choices that the scheduler makes, as accessing memory that might 419 * be in a remote cache after thread migration can be quite expensive. 420 * 421 * Sometimes, the word cache is abbreviated with a '$', because in US 422 * English the word cache is pronounced the same as cash. So L1D$ refers to 423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used 424 * in the rest of this theory statement for clarity. 425 * 426 * MEMORY CONTROLLER 427 * 428 * The memory controller is a component that provides access to DRAM. Each 429 * memory controller can access a set number of DRAM channels. Each channel 430 * can have a number of DIMMs (sticks of memory) associated with it. A 431 * given package may have more than one memory controller. The association 432 * of the memory controller to a group of cores is important as it is 433 * cheaper to access memory on the controller that you are associated with. 434 * 435 * NUMA 436 * 437 * NUMA or non-uniform memory access, describes a way that systems are 438 * built. On x86, any processor core can address all of the memory in the 439 * system. However, When using multiple sockets or possibly within a 440 * multi-chip module, some of that memory is physically closer and some of 441 * it is further. Memory that is further away is more expensive to access. 442 * Consider the following image of multiple sockets with memory: 443 * 444 * +--------+ +--------+ 445 * | DIMM A | +----------+ +----------+ | DIMM D | 446 * +--------+-+ | | | | +-+------+-+ 447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E | 448 * +--------+-+ | | | | +-+------+-+ 449 * | DIMM C | +----------+ +----------+ | DIMM F | 450 * +--------+ +--------+ 451 * 452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is 453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to 454 * access DIMMs A-C and more expensive to access D-F as it has to go 455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs 456 * D-F are cheaper than A-C. While the socket form is the most common, when 457 * using multi-chip modules, this can also sometimes occur. For another 458 * example of this that's more involved, see the AMD topology section. 459 * 460 * 461 * Intel Topology 462 * -------------- 463 * 464 * Most Intel processors since Nehalem, (as of this writing the current gen 465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of 466 * the package is a single monolithic die. MCMs currently aren't used. Most 467 * parts have three levels of caches, with the L3 cache being shared between 468 * all of the cores on the package. The L1/L2 cache is generally specific to 469 * an individual core. The following image shows at a simplified level what 470 * this looks like. The memory controller is commonly part of something called 471 * the 'Uncore', that used to be separate physical chips that were not a part of 472 * the package, but are now part of the same chip. 473 * 474 * +-----------------------------------------------------------------------+ 475 * | Package | 476 * | +-------------------+ +-------------------+ +-------------------+ | 477 * | | Core | | Core | | Core | | 478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | | 479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | | 480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | | 481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | | 482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | | 483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | | 484 * | | +--------------+ | | +--------------+ | | +--------------+ | | 485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | | 486 * | | +--------------+ | | +--------------+ | | +--------------+ | | 487 * | +-------------------+ +-------------------+ +-------------------+ | 488 * | +-------------------------------------------------------------------+ | 489 * | | Shared L3 Cache | | 490 * | +-------------------------------------------------------------------+ | 491 * | +-------------------------------------------------------------------+ | 492 * | | Memory Controller | | 493 * | +-------------------------------------------------------------------+ | 494 * +-----------------------------------------------------------------------+ 495 * 496 * A side effect of this current architecture is that what we care about from a 497 * scheduling and topology perspective, is simplified. In general we care about 498 * understanding which logical CPUs are part of the same core and socket. 499 * 500 * To determine the relationship between threads and cores, Intel initially used 501 * the identifier in the advanced programmable interrupt controller (APIC). They 502 * also added cpuid leaf 4 to give additional information about the number of 503 * threads and CPUs in the processor. With the addition of x2apic (which 504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an 505 * additional cpuid topology leaf 0xB was added. 506 * 507 * AMD Topology 508 * ------------ 509 * 510 * When discussing AMD topology, we want to break this into three distinct 511 * generations of topology. There's the basic topology that has been used in 512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced 513 * with family 0x15 (Bulldozer), and there's the topology that was introduced 514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth 515 * talking about. 516 * 517 * Until the introduction of family 0x17 (Zen), AMD did not implement something 518 * that they considered SMT. Whether or not the AMD processors have SMT 519 * influences many things including scheduling and reliability, availability, 520 * and serviceability (RAS) features. 521 * 522 * NODE 523 * 524 * AMD uses the term node to refer to a die that contains a number of cores 525 * and I/O resources. Depending on the processor family and model, more 526 * than one node can be present in the package. When there is more than one 527 * node this indicates a multi-chip module. Usually each node has its own 528 * access to memory and I/O devices. This is important and generally 529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a 530 * result, we track this relationship in the operating system. 531 * 532 * In processors with an L3 cache, the L3 cache is generally shared across 533 * the entire node, though the way this is carved up varies from generation 534 * to generation. 535 * 536 * BULLDOZER 537 * 538 * Starting with the Bulldozer family (0x15) and continuing until the 539 * introduction of the Zen microarchitecture, AMD introduced the idea of a 540 * compute unit. In a compute unit, two traditional cores share a number of 541 * hardware resources. Critically, they share the FPU, L1 instruction 542 * cache, and the L2 cache. Several compute units were then combined inside 543 * of a single node. Because the integer execution units, L1 data cache, 544 * and some other resources were not shared between the cores, AMD never 545 * considered this to be SMT. 546 * 547 * ZEN 548 * 549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module 550 * is called Zeppelin. These modules are similar to the idea of nodes used 551 * previously. Each of these nodes has two DRAM channels which all of the 552 * cores in the node can access uniformly. These nodes are linked together 553 * in the package, creating a NUMA environment. 554 * 555 * The Zeppelin die itself contains two different 'core complexes'. Each 556 * core complex consists of four cores which each have two threads, for a 557 * total of 8 logical CPUs per complex. Unlike other generations, 558 * where all the logical CPUs in a given node share the L3 cache, here each 559 * core complex has its own shared L3 cache. 560 * 561 * A further thing that we need to consider is that in some configurations, 562 * particularly with the Threadripper line of processors, not every die 563 * actually has its memory controllers wired up to actual memory channels. 564 * This means that some cores have memory attached to them and others 565 * don't. 566 * 567 * To put Zen in perspective, consider the following images: 568 * 569 * +--------------------------------------------------------+ 570 * | Core Complex | 571 * | +-------------------+ +-------------------+ +---+ | 572 * | | Core +----+ | | Core +----+ | | | | 573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | | 574 * | | | Thread | +----+ | | | Thread | +----+ | | | | 575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | | 576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | | 577 * | | +--------+ +--+ | | +--------+ +--+ | | | | 578 * | +-------------------+ +-------------------+ | C | | 579 * | +-------------------+ +-------------------+ | a | | 580 * | | Core +----+ | | Core +----+ | | c | | 581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | | 582 * | | | Thread | +----+ | | | Thread | +----+ | | e | | 583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | | 584 * | | | Thread | |L1| | | | Thread | |L1| | | | | 585 * | | +--------+ +--+ | | +--------+ +--+ | | | | 586 * | +-------------------+ +-------------------+ +---+ | 587 * | | 588 * +--------------------------------------------------------+ 589 * 590 * This first image represents a single Zen core complex that consists of four 591 * cores. 592 * 593 * 594 * +--------------------------------------------------------+ 595 * | Zeppelin Die | 596 * | +--------------------------------------------------+ | 597 * | | I/O Units (PCIe, SATA, USB, etc.) | | 598 * | +--------------------------------------------------+ | 599 * | HH | 600 * | +-----------+ HH +-----------+ | 601 * | | | HH | | | 602 * | | Core |==========| Core | | 603 * | | Complex |==========| Complex | | 604 * | | | HH | | | 605 * | +-----------+ HH +-----------+ | 606 * | HH | 607 * | +--------------------------------------------------+ | 608 * | | Memory Controller | | 609 * | +--------------------------------------------------+ | 610 * | | 611 * +--------------------------------------------------------+ 612 * 613 * This image represents a single Zeppelin Die. Note how both cores are 614 * connected to the same memory controller and I/O units. While each core 615 * complex has its own L3 cache as seen in the first image, they both have 616 * uniform access to memory. 617 * 618 * 619 * PP PP 620 * PP PP 621 * +----------PP---------------------PP---------+ 622 * | PP PP | 623 * | +-----------+ +-----------+ | 624 * | | | | | | 625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM 626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM 627 * | | | | | | 628 * | +-----------+ooo ...+-----------+ | 629 * | HH ooo ... HH | 630 * | HH oo.. HH | 631 * | HH ..oo HH | 632 * | HH ... ooo HH | 633 * | +-----------+... ooo+-----------+ | 634 * | | | | | | 635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM 636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM 637 * | | | | | | 638 * | +-----------+ +-----------+ | 639 * | PP PP | 640 * +----------PP---------------------PP---------+ 641 * PP PP 642 * PP PP 643 * 644 * This image represents a single Zen package. In this example, it has four 645 * Zeppelin dies, though some configurations only have a single one. In this 646 * example, each die is directly connected to the next. Also, each die is 647 * represented as being connected to memory by the 'M' character and connected 648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin 649 * die is made up of two core complexes, we have multiple different NUMA 650 * domains that we care about for these systems. 651 * 652 * CPUID LEAVES 653 * 654 * There are a few different CPUID leaves that we can use to try and understand 655 * the actual state of the world. As part of the introduction of family 0xf, AMD 656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical 657 * processors that are in the system. Because families before Zen didn't have 658 * SMT, this was always the number of cores that were in the system. However, it 659 * should always be thought of as the number of logical threads to be consistent 660 * between generations. In addition we also get the size of the APIC ID that is 661 * used to represent the number of logical processors. This is important for 662 * deriving topology information. 663 * 664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a 665 * bit between Bulldozer and later families, but it is quite useful in 666 * determining the topology information. Because this information has changed 667 * across family generations, it's worth calling out what these mean 668 * explicitly. The registers have the following meanings: 669 * 670 * %eax The APIC ID. The entire register is defined to have a 32-bit 671 * APIC ID, even though on systems without x2apic support, it will 672 * be limited to 8 bits. 673 * 674 * %ebx On Bulldozer-era systems this contains information about the 675 * number of cores that are in a compute unit (cores that share 676 * resources). It also contains a per-package compute unit ID that 677 * identifies which compute unit the logical CPU is a part of. 678 * 679 * On Zen-era systems this instead contains the number of threads 680 * per core and the ID of the core that the logical CPU is a part 681 * of. Note, this ID is unique only to the package, it is not 682 * globally unique across the entire system. 683 * 684 * %ecx This contains the number of nodes that exist in the package. It 685 * also contains an ID that identifies which node the logical CPU 686 * is a part of. 687 * 688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the 689 * cache layout to determine which logical CPUs are sharing which caches. 690 * 691 * illumos Topology 692 * ---------------- 693 * 694 * Based on the above we synthesize the information into several different 695 * variables that we store in the 'struct cpuid_info'. We'll go into the details 696 * of what each member is supposed to represent and their uniqueness. In 697 * general, there are two levels of uniqueness that we care about. We care about 698 * an ID that is globally unique. That means that it will be unique across all 699 * entities in the system. For example, the default logical CPU ID is globally 700 * unique. On the other hand, there is some information that we only care about 701 * being unique within the context of a single package / socket. Here are the 702 * variables that we keep track of and their meaning. 703 * 704 * Several of the values that are asking for an identifier, with the exception 705 * of cpi_apicid, are allowed to be synthetic. 706 * 707 * 708 * cpi_apicid 709 * 710 * This is the value of the CPU's APIC id. This should be the full 32-bit 711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit 712 * APIC ID. This value is globally unique between all logical CPUs across 713 * all packages. This is usually required by the APIC. 714 * 715 * cpi_chipid 716 * 717 * This value indicates the ID of the package that the logical CPU is a 718 * part of. This value is allowed to be synthetic. It is usually derived by 719 * taking the CPU's APIC ID and determining how many bits are used to 720 * represent CPU cores in the package. All logical CPUs that are part of 721 * the same package must have the same value. 722 * 723 * cpi_coreid 724 * 725 * This represents the ID of a CPU core. Two logical CPUs should only have 726 * the same cpi_coreid value if they are part of the same core. These 727 * values may be synthetic. On systems that support SMT, this value is 728 * usually derived from the APIC ID, otherwise it is often synthetic and 729 * just set to the value of the cpu_id in the cpu_t. 730 * 731 * cpi_pkgcoreid 732 * 733 * This is similar to the cpi_coreid in that logical CPUs that are part of 734 * the same core should have the same ID. The main difference is that these 735 * values are only required to be unique to a given socket. 736 * 737 * cpi_clogid 738 * 739 * This represents the logical ID of a logical CPU. This value should be 740 * unique within a given socket for each logical CPU. This is allowed to be 741 * synthetic, though it is usually based off of the CPU's apic ID. The 742 * broader system expects that logical CPUs that have are part of the same 743 * core have contiguous numbers. For example, if there were two threads per 744 * core, then the core IDs divided by two should be the same and the first 745 * modulus two should be zero and the second one. For example, IDs 4 and 5 746 * indicate two logical CPUs that are part of the same core. But IDs 5 and 747 * 6 represent two logical CPUs that are part of different cores. 748 * 749 * While it is common for the cpi_coreid and the cpi_clogid to be derived 750 * from the same source, strictly speaking, they don't have to be and the 751 * two values should be considered logically independent. One should not 752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine 753 * some kind of relationship. While this is tempting, we've seen cases on 754 * AMD family 0xf where the system's cpu id is not related to its APIC ID. 755 * 756 * cpi_ncpu_per_chip 757 * 758 * This value indicates the total number of logical CPUs that exist in the 759 * physical package. Critically, this is not the number of logical CPUs 760 * that exist for just the single core. 761 * 762 * This value should be the same for all logical CPUs in the same package. 763 * 764 * cpi_ncore_per_chip 765 * 766 * This value indicates the total number of physical CPU cores that exist 767 * in the package. The system compares this value with cpi_ncpu_per_chip to 768 * determine if simultaneous multi-threading (SMT) is enabled. When 769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and 770 * the X86FSET_HTT feature is not set. If this value is greater than one, 771 * than we consider the processor to have the feature X86FSET_CMP, to 772 * indicate that there is support for more than one core. 773 * 774 * This value should be the same for all logical CPUs in the same package. 775 * 776 * cpi_procnodes_per_pkg 777 * 778 * This value indicates the number of 'nodes' that exist in the package. 779 * When processors are actually a multi-chip module, this represents the 780 * number of such modules that exist in the package. Currently, on Intel 781 * based systems this member is always set to 1. 782 * 783 * This value should be the same for all logical CPUs in the same package. 784 * 785 * cpi_procnodeid 786 * 787 * This value indicates the ID of the node that the logical CPU is a part 788 * of. All logical CPUs that are in the same node must have the same value 789 * here. This value must be unique across all of the packages in the 790 * system. On Intel based systems, this is currently set to the value in 791 * cpi_chipid because there is only one node. 792 * 793 * cpi_cores_per_compunit 794 * 795 * This value indicates the number of cores that are part of a compute 796 * unit. See the AMD topology section for this. This member only has real 797 * meaning currently for AMD Bulldozer family processors. For all other 798 * processors, this should currently be set to 1. 799 * 800 * cpi_compunitid 801 * 802 * This indicates the compute unit that the logical CPU belongs to. For 803 * processors without AMD Bulldozer-style compute units this should be set 804 * to the value of cpi_coreid. 805 * 806 * cpi_ncpu_shr_last_cache 807 * 808 * This indicates the number of logical CPUs that are sharing the same last 809 * level cache. This value should be the same for all CPUs that are sharing 810 * that cache. The last cache refers to the cache that is closest to memory 811 * and furthest away from the CPU. 812 * 813 * cpi_last_lvl_cacheid 814 * 815 * This indicates the ID of the last cache that the logical CPU uses. This 816 * cache is often shared between multiple logical CPUs and is the cache 817 * that is closest to memory and furthest away from the CPU. This value 818 * should be the same for a group of logical CPUs only if they actually 819 * share the same last level cache. IDs should not overlap between 820 * packages. 821 * 822 * cpi_ncore_bits 823 * 824 * This indicates the number of bits that are required to represent all of 825 * the cores in the system. As cores are derived based on their APIC IDs, 826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for 827 * this value to be larger than the actual number of IDs that are present 828 * in the system. This is used to size tables by the CMI framework. It is 829 * only filled in for Intel and AMD CPUs. 830 * 831 * cpi_nthread_bits 832 * 833 * This indicates the number of bits required to represent all of the IDs 834 * that cover the logical CPUs that exist on a given core. It's OK for this 835 * value to be larger than the actual number of IDs that are present in the 836 * system. This is used to size tables by the CMI framework. It is 837 * only filled in for Intel and AMD CPUs. 838 * 839 * ----------- 840 * Hypervisors 841 * ----------- 842 * 843 * If trying to manage the differences between vendors wasn't bad enough, it can 844 * get worse thanks to our friend hardware virtualization. Hypervisors are given 845 * the ability to interpose on all cpuid instructions and change them to suit 846 * their purposes. In general, this is necessary as the hypervisor wants to be 847 * able to present a more uniform set of features or not necessarily give the 848 * guest operating system kernel knowledge of all features so it can be 849 * more easily migrated between systems. 850 * 851 * When it comes to trying to determine topology information, this can be a 852 * double edged sword. When a hypervisor doesn't actually implement a cpuid 853 * leaf, it'll often return all zeros. Because of that, you'll often see various 854 * checks scattered about fields being non-zero before we assume we can use 855 * them. 856 * 857 * When it comes to topology information, the hypervisor is often incentivized 858 * to lie to you about topology. This is because it doesn't always actually 859 * guarantee that topology at all. The topology path we take in the system 860 * depends on how the CPU advertises itself. If it advertises itself as an Intel 861 * or AMD CPU, then we basically do our normal path. However, when they don't 862 * use an actual vendor, then that usually turns into multiple one-core CPUs 863 * that we enumerate that are often on different sockets. The actual behavior 864 * depends greatly on what the hypervisor actually exposes to us. 865 * 866 * -------------------- 867 * Exposing Information 868 * -------------------- 869 * 870 * We expose CPUID information in three different forms in the system. 871 * 872 * The first is through the x86_featureset variable. This is used in conjunction 873 * with the is_x86_feature() function. This is queried by x86-specific functions 874 * to determine which features are or aren't present in the system and to make 875 * decisions based upon them. For example, users of this include everything from 876 * parts of the system dedicated to reliability, availability, and 877 * serviceability (RAS), to making decisions about how to handle security 878 * mitigations, to various x86-specific drivers. General purpose or 879 * architecture independent drivers should never be calling this function. 880 * 881 * The second means is through the auxiliary vector. The auxiliary vector is a 882 * series of tagged data that the kernel passes down to a user program when it 883 * begins executing. This information is used to indicate to programs what 884 * instruction set extensions are present. For example, information about the 885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down 886 * since user programs cannot make use of it. However, things like the AVX 887 * instruction sets are. Programs use this information to make run-time 888 * decisions about what features they should use. As an example, the run-time 889 * link-editor (rtld) can relocate different functions depending on the hardware 890 * support available. 891 * 892 * The final form is through a series of accessor functions that all have the 893 * form cpuid_get*. This is used by a number of different subsystems in the 894 * kernel to determine more detailed information about what we're running on, 895 * topology information, etc. Some of these subsystems include processor groups 896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI, 897 * microcode, and performance monitoring. These functions all ASSERT that the 898 * CPU they're being called on has reached a certain cpuid pass. If the passes 899 * are rearranged, then this needs to be adjusted. 900 */ 901 902 #include <sys/types.h> 903 #include <sys/archsystm.h> 904 #include <sys/x86_archext.h> 905 #include <sys/kmem.h> 906 #include <sys/systm.h> 907 #include <sys/cmn_err.h> 908 #include <sys/sunddi.h> 909 #include <sys/sunndi.h> 910 #include <sys/cpuvar.h> 911 #include <sys/processor.h> 912 #include <sys/sysmacros.h> 913 #include <sys/pg.h> 914 #include <sys/fp.h> 915 #include <sys/controlregs.h> 916 #include <sys/bitmap.h> 917 #include <sys/auxv_386.h> 918 #include <sys/memnode.h> 919 #include <sys/pci_cfgspace.h> 920 #include <sys/comm_page.h> 921 #include <sys/mach_mmu.h> 922 #include <sys/ucode.h> 923 #include <sys/tsc.h> 924 925 #ifdef __xpv 926 #include <sys/hypervisor.h> 927 #else 928 #include <sys/ontrap.h> 929 #endif 930 931 uint_t x86_vendor = X86_VENDOR_IntelClone; 932 uint_t x86_type = X86_TYPE_OTHER; 933 uint_t x86_clflush_size = 0; 934 935 #if defined(__xpv) 936 int x86_use_pcid = 0; 937 int x86_use_invpcid = 0; 938 #else 939 int x86_use_pcid = -1; 940 int x86_use_invpcid = -1; 941 #endif 942 943 uint_t pentiumpro_bug4046376; 944 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 946 947 static char *x86_feature_names[NUM_X86_FEATURES] = { 948 "lgpg", 949 "tsc", 950 "msr", 951 "mtrr", 952 "pge", 953 "de", 954 "cmov", 955 "mmx", 956 "mca", 957 "pae", 958 "cv8", 959 "pat", 960 "sep", 961 "sse", 962 "sse2", 963 "htt", 964 "asysc", 965 "nx", 966 "sse3", 967 "cx16", 968 "cmp", 969 "tscp", 970 "mwait", 971 "sse4a", 972 "cpuid", 973 "ssse3", 974 "sse4_1", 975 "sse4_2", 976 "1gpg", 977 "clfsh", 978 "64", 979 "aes", 980 "pclmulqdq", 981 "xsave", 982 "avx", 983 "vmx", 984 "svm", 985 "topoext", 986 "f16c", 987 "rdrand", 988 "x2apic", 989 "avx2", 990 "bmi1", 991 "bmi2", 992 "fma", 993 "smep", 994 "smap", 995 "adx", 996 "rdseed", 997 "mpx", 998 "avx512f", 999 "avx512dq", 1000 "avx512pf", 1001 "avx512er", 1002 "avx512cd", 1003 "avx512bw", 1004 "avx512vl", 1005 "avx512fma", 1006 "avx512vbmi", 1007 "avx512_vpopcntdq", 1008 "avx512_4vnniw", 1009 "avx512_4fmaps", 1010 "xsaveopt", 1011 "xsavec", 1012 "xsaves", 1013 "sha", 1014 "umip", 1015 "pku", 1016 "ospke", 1017 "pcid", 1018 "invpcid", 1019 "ibrs", 1020 "ibpb", 1021 "stibp", 1022 "ssbd", 1023 "ssbd_virt", 1024 "rdcl_no", 1025 "ibrs_all", 1026 "rsba", 1027 "ssb_no", 1028 "stibp_all", 1029 "flush_cmd", 1030 "l1d_vmentry_no", 1031 "fsgsbase", 1032 "clflushopt", 1033 "clwb", 1034 "monitorx", 1035 "clzero", 1036 "xop", 1037 "fma4", 1038 "tbm", 1039 "avx512_vnni", 1040 "amd_pcec" 1041 }; 1042 1043 boolean_t 1044 is_x86_feature(void *featureset, uint_t feature) 1045 { 1046 ASSERT(feature < NUM_X86_FEATURES); 1047 return (BT_TEST((ulong_t *)featureset, feature)); 1048 } 1049 1050 void 1051 add_x86_feature(void *featureset, uint_t feature) 1052 { 1053 ASSERT(feature < NUM_X86_FEATURES); 1054 BT_SET((ulong_t *)featureset, feature); 1055 } 1056 1057 void 1058 remove_x86_feature(void *featureset, uint_t feature) 1059 { 1060 ASSERT(feature < NUM_X86_FEATURES); 1061 BT_CLEAR((ulong_t *)featureset, feature); 1062 } 1063 1064 boolean_t 1065 compare_x86_featureset(void *setA, void *setB) 1066 { 1067 /* 1068 * We assume that the unused bits of the bitmap are always zero. 1069 */ 1070 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) { 1071 return (B_TRUE); 1072 } else { 1073 return (B_FALSE); 1074 } 1075 } 1076 1077 void 1078 print_x86_featureset(void *featureset) 1079 { 1080 uint_t i; 1081 1082 for (i = 0; i < NUM_X86_FEATURES; i++) { 1083 if (is_x86_feature(featureset, i)) { 1084 cmn_err(CE_CONT, "?x86_feature: %s\n", 1085 x86_feature_names[i]); 1086 } 1087 } 1088 } 1089 1090 /* Note: This is the maximum size for the CPU, not the size of the structure. */ 1091 static size_t xsave_state_size = 0; 1092 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE); 1093 boolean_t xsave_force_disable = B_FALSE; 1094 extern int disable_smap; 1095 1096 /* 1097 * This is set to platform type we are running on. 1098 */ 1099 static int platform_type = -1; 1100 1101 #if !defined(__xpv) 1102 /* 1103 * Variable to patch if hypervisor platform detection needs to be 1104 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 1105 */ 1106 int enable_platform_detection = 1; 1107 #endif 1108 1109 /* 1110 * monitor/mwait info. 1111 * 1112 * size_actual and buf_actual are the real address and size allocated to get 1113 * proper mwait_buf alignement. buf_actual and size_actual should be passed 1114 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 1115 * processor cache-line alignment, but this is not guarantied in the furture. 1116 */ 1117 struct mwait_info { 1118 size_t mon_min; /* min size to avoid missed wakeups */ 1119 size_t mon_max; /* size to avoid false wakeups */ 1120 size_t size_actual; /* size actually allocated */ 1121 void *buf_actual; /* memory actually allocated */ 1122 uint32_t support; /* processor support of monitor/mwait */ 1123 }; 1124 1125 /* 1126 * xsave/xrestor info. 1127 * 1128 * This structure contains HW feature bits and the size of the xsave save area. 1129 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure 1130 * (xsave_state) to describe the xsave layout. However, at runtime the 1131 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The 1132 * xsave_state structure simply represents the legacy layout of the beginning 1133 * of the xsave area. 1134 */ 1135 struct xsave_info { 1136 uint32_t xsav_hw_features_low; /* Supported HW features */ 1137 uint32_t xsav_hw_features_high; /* Supported HW features */ 1138 size_t xsav_max_size; /* max size save area for HW features */ 1139 size_t ymm_size; /* AVX: size of ymm save area */ 1140 size_t ymm_offset; /* AVX: offset for ymm save area */ 1141 size_t bndregs_size; /* MPX: size of bndregs save area */ 1142 size_t bndregs_offset; /* MPX: offset for bndregs save area */ 1143 size_t bndcsr_size; /* MPX: size of bndcsr save area */ 1144 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */ 1145 size_t opmask_size; /* AVX512: size of opmask save */ 1146 size_t opmask_offset; /* AVX512: offset for opmask save */ 1147 size_t zmmlo_size; /* AVX512: size of zmm 256 save */ 1148 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */ 1149 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */ 1150 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */ 1151 }; 1152 1153 1154 /* 1155 * These constants determine how many of the elements of the 1156 * cpuid we cache in the cpuid_info data structure; the 1157 * remaining elements are accessible via the cpuid instruction. 1158 */ 1159 1160 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */ 1161 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */ 1162 1163 /* 1164 * See the big theory statement for a more detailed explanation of what some of 1165 * these members mean. 1166 */ 1167 struct cpuid_info { 1168 uint_t cpi_pass; /* last pass completed */ 1169 /* 1170 * standard function information 1171 */ 1172 uint_t cpi_maxeax; /* fn 0: %eax */ 1173 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 1174 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 1175 1176 uint_t cpi_family; /* fn 1: extended family */ 1177 uint_t cpi_model; /* fn 1: extended model */ 1178 uint_t cpi_step; /* fn 1: stepping */ 1179 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 1180 /* AMD: package/socket # */ 1181 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 1182 int cpi_clogid; /* fn 1: %ebx: thread # */ 1183 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 1184 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 1185 uint_t cpi_ncache; /* fn 2: number of elements */ 1186 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 1187 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 1188 uint_t cpi_cache_leaf_size; /* Number of cache elements */ 1189 /* Intel fn: 4, AMD fn: 8000001d */ 1190 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */ 1191 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */ 1192 /* 1193 * extended function information 1194 */ 1195 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 1196 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 1197 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 1198 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 1199 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */ 1200 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 1201 1202 id_t cpi_coreid; /* same coreid => strands share core */ 1203 int cpi_pkgcoreid; /* core number within single package */ 1204 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 1205 /* Intel: fn 4: %eax[31-26] */ 1206 1207 /* 1208 * These values represent the number of bits that are required to store 1209 * information about the number of cores and threads. 1210 */ 1211 uint_t cpi_ncore_bits; 1212 uint_t cpi_nthread_bits; 1213 /* 1214 * supported feature information 1215 */ 1216 uint32_t cpi_support[6]; 1217 #define STD_EDX_FEATURES 0 1218 #define AMD_EDX_FEATURES 1 1219 #define TM_EDX_FEATURES 2 1220 #define STD_ECX_FEATURES 3 1221 #define AMD_ECX_FEATURES 4 1222 #define STD_EBX_FEATURES 5 1223 /* 1224 * Synthesized information, where known. 1225 */ 1226 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 1227 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 1228 uint32_t cpi_socket; /* Chip package/socket type */ 1229 1230 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 1231 uint32_t cpi_apicid; 1232 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 1233 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 1234 /* Intel: 1 */ 1235 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */ 1236 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */ 1237 1238 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ 1239 }; 1240 1241 1242 static struct cpuid_info cpuid_info0; 1243 1244 /* 1245 * These bit fields are defined by the Intel Application Note AP-485 1246 * "Intel Processor Identification and the CPUID Instruction" 1247 */ 1248 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 1249 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 1250 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 1251 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 1252 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 1253 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 1254 1255 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 1256 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 1257 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 1258 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 1259 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx) 1260 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx) 1261 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx) 1262 1263 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 1264 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 1265 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 1266 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 1267 1268 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 1269 #define CPI_XMAXEAX_MAX 0x80000100 1270 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 1271 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 1272 1273 /* 1274 * Function 4 (Deterministic Cache Parameters) macros 1275 * Defined by Intel Application Note AP-485 1276 */ 1277 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 1278 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 1279 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 1280 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 1281 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 1282 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 1283 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 1284 1285 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 1286 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 1287 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 1288 1289 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 1290 1291 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 1292 1293 1294 /* 1295 * A couple of shorthand macros to identify "later" P6-family chips 1296 * like the Pentium M and Core. First, the "older" P6-based stuff 1297 * (loosely defined as "pre-Pentium-4"): 1298 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 1299 */ 1300 #define IS_LEGACY_P6(cpi) ( \ 1301 cpi->cpi_family == 6 && \ 1302 (cpi->cpi_model == 1 || \ 1303 cpi->cpi_model == 3 || \ 1304 cpi->cpi_model == 5 || \ 1305 cpi->cpi_model == 6 || \ 1306 cpi->cpi_model == 7 || \ 1307 cpi->cpi_model == 8 || \ 1308 cpi->cpi_model == 0xA || \ 1309 cpi->cpi_model == 0xB) \ 1310 ) 1311 1312 /* A "new F6" is everything with family 6 that's not the above */ 1313 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 1314 1315 /* Extended family/model support */ 1316 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 1317 cpi->cpi_family >= 0xf) 1318 1319 /* 1320 * Info for monitor/mwait idle loop. 1321 * 1322 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 1323 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 1324 * 2006. 1325 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 1326 * Documentation Updates" #33633, Rev 2.05, December 2006. 1327 */ 1328 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 1329 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 1330 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 1331 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 1332 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 1333 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 1334 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 1335 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 1336 /* 1337 * Number of sub-cstates for a given c-state. 1338 */ 1339 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 1340 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 1341 1342 /* 1343 * XSAVE leaf 0xD enumeration 1344 */ 1345 #define CPUID_LEAFD_2_YMM_OFFSET 576 1346 #define CPUID_LEAFD_2_YMM_SIZE 256 1347 1348 /* 1349 * Common extended leaf names to cut down on typos. 1350 */ 1351 #define CPUID_LEAF_EXT_0 0x80000000 1352 #define CPUID_LEAF_EXT_8 0x80000008 1353 #define CPUID_LEAF_EXT_1d 0x8000001d 1354 #define CPUID_LEAF_EXT_1e 0x8000001e 1355 1356 /* 1357 * Functions we consune from cpuid_subr.c; don't publish these in a header 1358 * file to try and keep people using the expected cpuid_* interfaces. 1359 */ 1360 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 1361 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 1362 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 1363 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 1364 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 1365 1366 /* 1367 * Apply up various platform-dependent restrictions where the 1368 * underlying platform restrictions mean the CPU can be marked 1369 * as less capable than its cpuid instruction would imply. 1370 */ 1371 #if defined(__xpv) 1372 static void 1373 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 1374 { 1375 switch (eax) { 1376 case 1: { 1377 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 1378 0 : CPUID_INTC_EDX_MCA; 1379 cp->cp_edx &= 1380 ~(mcamask | 1381 CPUID_INTC_EDX_PSE | 1382 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 1383 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 1384 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 1385 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 1386 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 1387 break; 1388 } 1389 1390 case 0x80000001: 1391 cp->cp_edx &= 1392 ~(CPUID_AMD_EDX_PSE | 1393 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 1394 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 1395 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 1396 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 1397 CPUID_AMD_EDX_TSCP); 1398 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 1399 break; 1400 default: 1401 break; 1402 } 1403 1404 switch (vendor) { 1405 case X86_VENDOR_Intel: 1406 switch (eax) { 1407 case 4: 1408 /* 1409 * Zero out the (ncores-per-chip - 1) field 1410 */ 1411 cp->cp_eax &= 0x03fffffff; 1412 break; 1413 default: 1414 break; 1415 } 1416 break; 1417 case X86_VENDOR_AMD: 1418 switch (eax) { 1419 1420 case 0x80000001: 1421 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 1422 break; 1423 1424 case CPUID_LEAF_EXT_8: 1425 /* 1426 * Zero out the (ncores-per-chip - 1) field 1427 */ 1428 cp->cp_ecx &= 0xffffff00; 1429 break; 1430 default: 1431 break; 1432 } 1433 break; 1434 default: 1435 break; 1436 } 1437 } 1438 #else 1439 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 1440 #endif 1441 1442 /* 1443 * Some undocumented ways of patching the results of the cpuid 1444 * instruction to permit running Solaris 10 on future cpus that 1445 * we don't currently support. Could be set to non-zero values 1446 * via settings in eeprom. 1447 */ 1448 1449 uint32_t cpuid_feature_ecx_include; 1450 uint32_t cpuid_feature_ecx_exclude; 1451 uint32_t cpuid_feature_edx_include; 1452 uint32_t cpuid_feature_edx_exclude; 1453 1454 /* 1455 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 1456 */ 1457 void 1458 cpuid_alloc_space(cpu_t *cpu) 1459 { 1460 /* 1461 * By convention, cpu0 is the boot cpu, which is set up 1462 * before memory allocation is available. All other cpus get 1463 * their cpuid_info struct allocated here. 1464 */ 1465 ASSERT(cpu->cpu_id != 0); 1466 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 1467 cpu->cpu_m.mcpu_cpi = 1468 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 1469 } 1470 1471 void 1472 cpuid_free_space(cpu_t *cpu) 1473 { 1474 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1475 int i; 1476 1477 ASSERT(cpi != NULL); 1478 ASSERT(cpi != &cpuid_info0); 1479 1480 /* 1481 * Free up any cache leaf related dynamic storage. The first entry was 1482 * cached from the standard cpuid storage, so we should not free it. 1483 */ 1484 for (i = 1; i < cpi->cpi_cache_leaf_size; i++) 1485 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs)); 1486 if (cpi->cpi_cache_leaf_size > 0) 1487 kmem_free(cpi->cpi_cache_leaves, 1488 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *)); 1489 1490 kmem_free(cpi, sizeof (*cpi)); 1491 cpu->cpu_m.mcpu_cpi = NULL; 1492 } 1493 1494 #if !defined(__xpv) 1495 /* 1496 * Determine the type of the underlying platform. This is used to customize 1497 * initialization of various subsystems (e.g. TSC). determine_platform() must 1498 * only ever be called once to prevent two processors from seeing different 1499 * values of platform_type. Must be called before cpuid_pass1(), the earliest 1500 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv). 1501 */ 1502 void 1503 determine_platform(void) 1504 { 1505 struct cpuid_regs cp; 1506 uint32_t base; 1507 uint32_t regs[4]; 1508 char *hvstr = (char *)regs; 1509 1510 ASSERT(platform_type == -1); 1511 1512 platform_type = HW_NATIVE; 1513 1514 if (!enable_platform_detection) 1515 return; 1516 1517 /* 1518 * If Hypervisor CPUID bit is set, try to determine hypervisor 1519 * vendor signature, and set platform type accordingly. 1520 * 1521 * References: 1522 * http://lkml.org/lkml/2008/10/1/246 1523 * http://kb.vmware.com/kb/1009458 1524 */ 1525 cp.cp_eax = 0x1; 1526 (void) __cpuid_insn(&cp); 1527 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) { 1528 cp.cp_eax = 0x40000000; 1529 (void) __cpuid_insn(&cp); 1530 regs[0] = cp.cp_ebx; 1531 regs[1] = cp.cp_ecx; 1532 regs[2] = cp.cp_edx; 1533 regs[3] = 0; 1534 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) { 1535 platform_type = HW_XEN_HVM; 1536 return; 1537 } 1538 if (strcmp(hvstr, HVSIG_VMWARE) == 0) { 1539 platform_type = HW_VMWARE; 1540 return; 1541 } 1542 if (strcmp(hvstr, HVSIG_KVM) == 0) { 1543 platform_type = HW_KVM; 1544 return; 1545 } 1546 if (strcmp(hvstr, HVSIG_BHYVE) == 0) { 1547 platform_type = HW_BHYVE; 1548 return; 1549 } 1550 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0) 1551 platform_type = HW_MICROSOFT; 1552 } else { 1553 /* 1554 * Check older VMware hardware versions. VMware hypervisor is 1555 * detected by performing an IN operation to VMware hypervisor 1556 * port and checking that value returned in %ebx is VMware 1557 * hypervisor magic value. 1558 * 1559 * References: http://kb.vmware.com/kb/1009458 1560 */ 1561 vmware_port(VMWARE_HVCMD_GETVERSION, regs); 1562 if (regs[1] == VMWARE_HVMAGIC) { 1563 platform_type = HW_VMWARE; 1564 return; 1565 } 1566 } 1567 1568 /* 1569 * Check Xen hypervisor. In a fully virtualized domain, 1570 * Xen's pseudo-cpuid function returns a string representing the 1571 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum 1572 * supported cpuid function. We need at least a (base + 2) leaf value 1573 * to do what we want to do. Try different base values, since the 1574 * hypervisor might use a different one depending on whether Hyper-V 1575 * emulation is switched on by default or not. 1576 */ 1577 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 1578 cp.cp_eax = base; 1579 (void) __cpuid_insn(&cp); 1580 regs[0] = cp.cp_ebx; 1581 regs[1] = cp.cp_ecx; 1582 regs[2] = cp.cp_edx; 1583 regs[3] = 0; 1584 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 && 1585 cp.cp_eax >= (base + 2)) { 1586 platform_type &= ~HW_NATIVE; 1587 platform_type |= HW_XEN_HVM; 1588 return; 1589 } 1590 } 1591 } 1592 1593 int 1594 get_hwenv(void) 1595 { 1596 ASSERT(platform_type != -1); 1597 return (platform_type); 1598 } 1599 1600 int 1601 is_controldom(void) 1602 { 1603 return (0); 1604 } 1605 1606 #else 1607 1608 int 1609 get_hwenv(void) 1610 { 1611 return (HW_XEN_PV); 1612 } 1613 1614 int 1615 is_controldom(void) 1616 { 1617 return (DOMAIN_IS_INITDOMAIN(xen_info)); 1618 } 1619 1620 #endif /* __xpv */ 1621 1622 /* 1623 * Make sure that we have gathered all of the CPUID leaves that we might need to 1624 * determine topology. We assume that the standard leaf 1 has already been done 1625 * and that xmaxeax has already been calculated. 1626 */ 1627 static void 1628 cpuid_gather_amd_topology_leaves(cpu_t *cpu) 1629 { 1630 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1631 1632 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 1633 struct cpuid_regs *cp; 1634 1635 cp = &cpi->cpi_extd[8]; 1636 cp->cp_eax = CPUID_LEAF_EXT_8; 1637 (void) __cpuid_insn(cp); 1638 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp); 1639 } 1640 1641 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1642 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1643 struct cpuid_regs *cp; 1644 1645 cp = &cpi->cpi_extd[0x1e]; 1646 cp->cp_eax = CPUID_LEAF_EXT_1e; 1647 (void) __cpuid_insn(cp); 1648 } 1649 } 1650 1651 /* 1652 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer 1653 * it to everything else. If not, and we're on an AMD system where 8000001e is 1654 * valid, then we use that. Othewrise, we fall back to the default value for the 1655 * APIC ID in leaf 1. 1656 */ 1657 static uint32_t 1658 cpuid_gather_apicid(struct cpuid_info *cpi) 1659 { 1660 /* 1661 * Leaf B changes based on the arguments to it. Beacuse we don't cache 1662 * it, we need to gather it again. 1663 */ 1664 if (cpi->cpi_maxeax >= 0xB) { 1665 struct cpuid_regs regs; 1666 struct cpuid_regs *cp; 1667 1668 cp = ®s; 1669 cp->cp_eax = 0xB; 1670 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1671 (void) __cpuid_insn(cp); 1672 1673 if (cp->cp_ebx != 0) { 1674 return (cp->cp_edx); 1675 } 1676 } 1677 1678 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1679 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1680 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1681 return (cpi->cpi_extd[0x1e].cp_eax); 1682 } 1683 1684 return (CPI_APIC_ID(cpi)); 1685 } 1686 1687 /* 1688 * For AMD processors, attempt to calculate the number of chips and cores that 1689 * exist. The way that we do this varies based on the generation, because the 1690 * generations themselves have changed dramatically. 1691 * 1692 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores. 1693 * However, with the advent of family 17h (Zen) it actually tells us the number 1694 * of threads, so we need to look at leaf 0x8000001e if available to determine 1695 * its value. Otherwise, for all prior families, the number of enabled cores is 1696 * the same as threads. 1697 * 1698 * If we do not have leaf 0x80000008, then we assume that this processor does 1699 * not have anything. AMD's older CPUID specification says there's no reason to 1700 * fall back to leaf 1. 1701 * 1702 * In some virtualization cases we will not have leaf 8000001e or it will be 1703 * zero. When that happens we assume the number of threads is one. 1704 */ 1705 static void 1706 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores) 1707 { 1708 uint_t nthreads, nthread_per_core; 1709 1710 nthreads = nthread_per_core = 1; 1711 1712 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 1713 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1; 1714 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) { 1715 nthreads = CPI_CPU_COUNT(cpi); 1716 } 1717 1718 /* 1719 * For us to have threads, and know about it, we have to be at least at 1720 * family 17h and have the cpuid bit that says we have extended 1721 * topology. 1722 */ 1723 if (cpi->cpi_family >= 0x17 && 1724 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1725 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1726 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1; 1727 } 1728 1729 *ncpus = nthreads; 1730 *ncores = nthreads / nthread_per_core; 1731 } 1732 1733 /* 1734 * Seed the initial values for the cores and threads for an Intel based 1735 * processor. These values will be overwritten if we detect that the processor 1736 * supports CPUID leaf 0xb. 1737 */ 1738 static void 1739 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores) 1740 { 1741 /* 1742 * Only seed the number of physical cores from the first level leaf 4 1743 * information. The number of threads there indicate how many share the 1744 * L1 cache, which may or may not have anything to do with the number of 1745 * logical CPUs per core. 1746 */ 1747 if (cpi->cpi_maxeax >= 4) { 1748 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1; 1749 } else { 1750 *ncores = 1; 1751 } 1752 1753 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) { 1754 *ncpus = CPI_CPU_COUNT(cpi); 1755 } else { 1756 *ncpus = *ncores; 1757 } 1758 } 1759 1760 static boolean_t 1761 cpuid_leafB_getids(cpu_t *cpu) 1762 { 1763 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1764 struct cpuid_regs regs; 1765 struct cpuid_regs *cp; 1766 1767 if (cpi->cpi_maxeax < 0xB) 1768 return (B_FALSE); 1769 1770 cp = ®s; 1771 cp->cp_eax = 0xB; 1772 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1773 1774 (void) __cpuid_insn(cp); 1775 1776 /* 1777 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1778 * indicates that the extended topology enumeration leaf is 1779 * available. 1780 */ 1781 if (cp->cp_ebx != 0) { 1782 uint32_t x2apic_id = 0; 1783 uint_t coreid_shift = 0; 1784 uint_t ncpu_per_core = 1; 1785 uint_t chipid_shift = 0; 1786 uint_t ncpu_per_chip = 1; 1787 uint_t i; 1788 uint_t level; 1789 1790 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1791 cp->cp_eax = 0xB; 1792 cp->cp_ecx = i; 1793 1794 (void) __cpuid_insn(cp); 1795 level = CPI_CPU_LEVEL_TYPE(cp); 1796 1797 if (level == 1) { 1798 x2apic_id = cp->cp_edx; 1799 coreid_shift = BITX(cp->cp_eax, 4, 0); 1800 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1801 } else if (level == 2) { 1802 x2apic_id = cp->cp_edx; 1803 chipid_shift = BITX(cp->cp_eax, 4, 0); 1804 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1805 } 1806 } 1807 1808 /* 1809 * cpi_apicid is taken care of in cpuid_gather_apicid. 1810 */ 1811 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1812 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1813 ncpu_per_core; 1814 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1815 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1816 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1817 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1818 cpi->cpi_procnodeid = cpi->cpi_chipid; 1819 cpi->cpi_compunitid = cpi->cpi_coreid; 1820 1821 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 1822 cpi->cpi_nthread_bits = coreid_shift; 1823 cpi->cpi_ncore_bits = chipid_shift - coreid_shift; 1824 } 1825 1826 return (B_TRUE); 1827 } else { 1828 return (B_FALSE); 1829 } 1830 } 1831 1832 static void 1833 cpuid_intel_getids(cpu_t *cpu, void *feature) 1834 { 1835 uint_t i; 1836 uint_t chipid_shift = 0; 1837 uint_t coreid_shift = 0; 1838 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1839 1840 /* 1841 * There are no compute units or processor nodes currently on Intel. 1842 * Always set these to one. 1843 */ 1844 cpi->cpi_procnodes_per_pkg = 1; 1845 cpi->cpi_cores_per_compunit = 1; 1846 1847 /* 1848 * If cpuid Leaf B is present, use that to try and get this information. 1849 * It will be the most accurate for Intel CPUs. 1850 */ 1851 if (cpuid_leafB_getids(cpu)) 1852 return; 1853 1854 /* 1855 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip 1856 * and ncore_per_chip. These represent the largest power of two values 1857 * that we need to cover all of the IDs in the system. Therefore, we use 1858 * those values to seed the number of bits needed to cover information 1859 * in the case when leaf B is not available. These values will probably 1860 * be larger than required, but that's OK. 1861 */ 1862 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip); 1863 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip); 1864 1865 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1866 chipid_shift++; 1867 1868 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 1869 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 1870 1871 if (is_x86_feature(feature, X86FSET_CMP)) { 1872 /* 1873 * Multi-core (and possibly multi-threaded) 1874 * processors. 1875 */ 1876 uint_t ncpu_per_core; 1877 if (cpi->cpi_ncore_per_chip == 1) 1878 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1879 else if (cpi->cpi_ncore_per_chip > 1) 1880 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1881 cpi->cpi_ncore_per_chip; 1882 /* 1883 * 8bit APIC IDs on dual core Pentiums 1884 * look like this: 1885 * 1886 * +-----------------------+------+------+ 1887 * | Physical Package ID | MC | HT | 1888 * +-----------------------+------+------+ 1889 * <------- chipid --------> 1890 * <------- coreid ---------------> 1891 * <--- clogid --> 1892 * <------> 1893 * pkgcoreid 1894 * 1895 * Where the number of bits necessary to 1896 * represent MC and HT fields together equals 1897 * to the minimum number of bits necessary to 1898 * store the value of cpi->cpi_ncpu_per_chip. 1899 * Of those bits, the MC part uses the number 1900 * of bits necessary to store the value of 1901 * cpi->cpi_ncore_per_chip. 1902 */ 1903 for (i = 1; i < ncpu_per_core; i <<= 1) 1904 coreid_shift++; 1905 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 1906 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1907 } else if (is_x86_feature(feature, X86FSET_HTT)) { 1908 /* 1909 * Single-core multi-threaded processors. 1910 */ 1911 cpi->cpi_coreid = cpi->cpi_chipid; 1912 cpi->cpi_pkgcoreid = 0; 1913 } else { 1914 /* 1915 * Single-core single-thread processors. 1916 */ 1917 cpi->cpi_coreid = cpu->cpu_id; 1918 cpi->cpi_pkgcoreid = 0; 1919 } 1920 cpi->cpi_procnodeid = cpi->cpi_chipid; 1921 cpi->cpi_compunitid = cpi->cpi_coreid; 1922 } 1923 1924 /* 1925 * Historically, AMD has had CMP chips with only a single thread per core. 1926 * However, starting in family 17h (Zen), this has changed and they now have 1927 * multiple threads. Our internal core id needs to be a unique value. 1928 * 1929 * To determine the core id of an AMD system, if we're from a family before 17h, 1930 * then we just use the cpu id, as that gives us a good value that will be 1931 * unique for each core. If instead, we're on family 17h or later, then we need 1932 * to do something more complicated. CPUID leaf 0x8000001e can tell us 1933 * how many threads are in the system. Based on that, we'll shift the APIC ID. 1934 * We can't use the normal core id in that leaf as it's only unique within the 1935 * socket, which is perfect for cpi_pkgcoreid, but not us. 1936 */ 1937 static id_t 1938 cpuid_amd_get_coreid(cpu_t *cpu) 1939 { 1940 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1941 1942 if (cpi->cpi_family >= 0x17 && 1943 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1944 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1945 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1; 1946 if (nthreads > 1) { 1947 VERIFY3U(nthreads, ==, 2); 1948 return (cpi->cpi_apicid >> 1); 1949 } 1950 } 1951 1952 return (cpu->cpu_id); 1953 } 1954 1955 /* 1956 * IDs on AMD is a more challenging task. This is notable because of the 1957 * following two facts: 1958 * 1959 * 1. Before family 0x17 (Zen), there was no support for SMT and there was 1960 * also no way to get an actual unique core id from the system. As such, we 1961 * synthesize this case by using cpu->cpu_id. This scheme does not, 1962 * however, guarantee that sibling cores of a chip will have sequential 1963 * coreids starting at a multiple of the number of cores per chip - that is 1964 * usually the case, but if the ACPI MADT table is presented in a different 1965 * order then we need to perform a few more gymnastics for the pkgcoreid. 1966 * 1967 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups 1968 * called compute units. These compute units share the L1I cache, L2 cache, 1969 * and the FPU. To deal with this, a new topology leaf was added in 1970 * 0x8000001e. However, parts of this leaf have different meanings 1971 * once we get to family 0x17. 1972 */ 1973 1974 static void 1975 cpuid_amd_getids(cpu_t *cpu, uchar_t *features) 1976 { 1977 int i, first_half, coreidsz; 1978 uint32_t nb_caps_reg; 1979 uint_t node2_1; 1980 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1981 struct cpuid_regs *cp; 1982 1983 /* 1984 * Calculate the core id (this comes from hardware in family 0x17 if it 1985 * hasn't been stripped by virtualization). We always set the compute 1986 * unit id to the same value. Also, initialize the default number of 1987 * cores per compute unit and nodes per package. This will be 1988 * overwritten when we know information about a particular family. 1989 */ 1990 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu); 1991 cpi->cpi_compunitid = cpi->cpi_coreid; 1992 cpi->cpi_cores_per_compunit = 1; 1993 cpi->cpi_procnodes_per_pkg = 1; 1994 1995 /* 1996 * To construct the logical ID, we need to determine how many APIC IDs 1997 * are dedicated to the cores and threads. This is provided for us in 1998 * 0x80000008. However, if it's not present (say due to virtualization), 1999 * then we assume it's one. This should be present on all 64-bit AMD 2000 * processors. It was added in family 0xf (Hammer). 2001 */ 2002 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 2003 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 2004 2005 /* 2006 * In AMD parlance chip is really a node while illumos 2007 * uses chip as equivalent to socket/package. 2008 */ 2009 if (coreidsz == 0) { 2010 /* Use legacy method */ 2011 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 2012 coreidsz++; 2013 if (coreidsz == 0) 2014 coreidsz = 1; 2015 } 2016 } else { 2017 /* Assume single-core part */ 2018 coreidsz = 1; 2019 } 2020 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1); 2021 2022 /* 2023 * The package core ID varies depending on the family. For family 17h, 2024 * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we 2025 * can use the clogid as is. When family 17h is virtualized, the clogid 2026 * should be sufficient as if we don't have valid data in the leaf, then 2027 * we won't think we have SMT, in which case the cpi_clogid should be 2028 * sufficient. 2029 */ 2030 if (cpi->cpi_family >= 0x17 && 2031 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 2032 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e && 2033 cpi->cpi_extd[0x1e].cp_ebx != 0) { 2034 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0); 2035 } else { 2036 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 2037 } 2038 2039 /* 2040 * Obtain the node ID and compute unit IDs. If we're on family 0x15 2041 * (bulldozer) or newer, then we can derive all of this from leaf 2042 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family. 2043 */ 2044 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 2045 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 2046 cp = &cpi->cpi_extd[0x1e]; 2047 2048 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1; 2049 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0); 2050 2051 /* 2052 * For Bulldozer-era CPUs, recalculate the compute unit 2053 * information. 2054 */ 2055 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) { 2056 cpi->cpi_cores_per_compunit = 2057 BITX(cp->cp_ebx, 15, 8) + 1; 2058 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) + 2059 (cpi->cpi_ncore_per_chip / 2060 cpi->cpi_cores_per_compunit) * 2061 (cpi->cpi_procnodeid / 2062 cpi->cpi_procnodes_per_pkg); 2063 } 2064 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) { 2065 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 2066 } else if (cpi->cpi_family == 0x10) { 2067 /* 2068 * See if we are a multi-node processor. 2069 * All processors in the system have the same number of nodes 2070 */ 2071 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 2072 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 2073 /* Single-node */ 2074 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 2075 coreidsz); 2076 } else { 2077 2078 /* 2079 * Multi-node revision D (2 nodes per package 2080 * are supported) 2081 */ 2082 cpi->cpi_procnodes_per_pkg = 2; 2083 2084 first_half = (cpi->cpi_pkgcoreid <= 2085 (cpi->cpi_ncore_per_chip/2 - 1)); 2086 2087 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 2088 /* We are BSP */ 2089 cpi->cpi_procnodeid = (first_half ? 0 : 1); 2090 } else { 2091 2092 /* We are AP */ 2093 /* NodeId[2:1] bits to use for reading F3xe8 */ 2094 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 2095 2096 nb_caps_reg = 2097 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 2098 2099 /* 2100 * Check IntNodeNum bit (31:30, but bit 31 is 2101 * always 0 on dual-node processors) 2102 */ 2103 if (BITX(nb_caps_reg, 30, 30) == 0) 2104 cpi->cpi_procnodeid = node2_1 + 2105 !first_half; 2106 else 2107 cpi->cpi_procnodeid = node2_1 + 2108 first_half; 2109 } 2110 } 2111 } else { 2112 cpi->cpi_procnodeid = 0; 2113 } 2114 2115 cpi->cpi_chipid = 2116 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg; 2117 2118 cpi->cpi_ncore_bits = coreidsz; 2119 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip / 2120 cpi->cpi_ncore_per_chip); 2121 } 2122 2123 static void 2124 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset) 2125 { 2126 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2127 2128 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2129 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 2130 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB) 2131 add_x86_feature(featureset, X86FSET_IBPB); 2132 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS) 2133 add_x86_feature(featureset, X86FSET_IBRS); 2134 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP) 2135 add_x86_feature(featureset, X86FSET_STIBP); 2136 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL) 2137 add_x86_feature(featureset, X86FSET_IBRS_ALL); 2138 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL) 2139 add_x86_feature(featureset, X86FSET_STIBP_ALL); 2140 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS) 2141 add_x86_feature(featureset, X86FSET_RSBA); 2142 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD) 2143 add_x86_feature(featureset, X86FSET_SSBD); 2144 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD) 2145 add_x86_feature(featureset, X86FSET_SSBD_VIRT); 2146 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO) 2147 add_x86_feature(featureset, X86FSET_SSB_NO); 2148 } else if (cpi->cpi_vendor == X86_VENDOR_Intel && 2149 cpi->cpi_maxeax >= 7) { 2150 struct cpuid_regs *ecp; 2151 ecp = &cpi->cpi_std[7]; 2152 2153 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) { 2154 add_x86_feature(featureset, X86FSET_IBRS); 2155 add_x86_feature(featureset, X86FSET_IBPB); 2156 } 2157 2158 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) { 2159 add_x86_feature(featureset, X86FSET_STIBP); 2160 } 2161 2162 /* 2163 * Don't read the arch caps MSR on xpv where we lack the 2164 * on_trap(). 2165 */ 2166 #ifndef __xpv 2167 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) { 2168 on_trap_data_t otd; 2169 2170 /* 2171 * Be paranoid and assume we'll get a #GP. 2172 */ 2173 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2174 uint64_t reg; 2175 2176 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES); 2177 if (reg & IA32_ARCH_CAP_RDCL_NO) { 2178 add_x86_feature(featureset, 2179 X86FSET_RDCL_NO); 2180 } 2181 if (reg & IA32_ARCH_CAP_IBRS_ALL) { 2182 add_x86_feature(featureset, 2183 X86FSET_IBRS_ALL); 2184 } 2185 if (reg & IA32_ARCH_CAP_RSBA) { 2186 add_x86_feature(featureset, 2187 X86FSET_RSBA); 2188 } 2189 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) { 2190 add_x86_feature(featureset, 2191 X86FSET_L1D_VM_NO); 2192 } 2193 if (reg & IA32_ARCH_CAP_SSB_NO) { 2194 add_x86_feature(featureset, 2195 X86FSET_SSB_NO); 2196 } 2197 } 2198 no_trap(); 2199 } 2200 #endif /* !__xpv */ 2201 2202 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD) 2203 add_x86_feature(featureset, X86FSET_SSBD); 2204 2205 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD) 2206 add_x86_feature(featureset, X86FSET_FLUSH_CMD); 2207 } 2208 } 2209 2210 /* 2211 * Setup XFeature_Enabled_Mask register. Required by xsave feature. 2212 */ 2213 void 2214 setup_xfem(void) 2215 { 2216 uint64_t flags = XFEATURE_LEGACY_FP; 2217 2218 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 2219 2220 if (is_x86_feature(x86_featureset, X86FSET_SSE)) 2221 flags |= XFEATURE_SSE; 2222 2223 if (is_x86_feature(x86_featureset, X86FSET_AVX)) 2224 flags |= XFEATURE_AVX; 2225 2226 if (is_x86_feature(x86_featureset, X86FSET_AVX512F)) 2227 flags |= XFEATURE_AVX512; 2228 2229 set_xcr(XFEATURE_ENABLED_MASK, flags); 2230 2231 xsave_bv_all = flags; 2232 } 2233 2234 static void 2235 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset) 2236 { 2237 struct cpuid_info *cpi; 2238 2239 cpi = cpu->cpu_m.mcpu_cpi; 2240 2241 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 2242 cpuid_gather_amd_topology_leaves(cpu); 2243 } 2244 2245 cpi->cpi_apicid = cpuid_gather_apicid(cpi); 2246 2247 /* 2248 * Before we can calculate the IDs that we should assign to this 2249 * processor, we need to understand how many cores and threads it has. 2250 */ 2251 switch (cpi->cpi_vendor) { 2252 case X86_VENDOR_Intel: 2253 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip, 2254 &cpi->cpi_ncore_per_chip); 2255 break; 2256 case X86_VENDOR_AMD: 2257 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip, 2258 &cpi->cpi_ncore_per_chip); 2259 break; 2260 default: 2261 /* 2262 * If we have some other x86 compatible chip, it's not clear how 2263 * they would behave. The most common case is virtualization 2264 * today, though there are also 64-bit VIA chips. Assume that 2265 * all we can get is the basic Leaf 1 HTT information. 2266 */ 2267 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) { 2268 cpi->cpi_ncore_per_chip = 1; 2269 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 2270 } 2271 break; 2272 } 2273 2274 /* 2275 * Based on the calculated number of threads and cores, potentially 2276 * assign the HTT and CMT features. 2277 */ 2278 if (cpi->cpi_ncore_per_chip > 1) { 2279 add_x86_feature(featureset, X86FSET_CMP); 2280 } 2281 2282 if (cpi->cpi_ncpu_per_chip > 1 && 2283 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) { 2284 add_x86_feature(featureset, X86FSET_HTT); 2285 } 2286 2287 /* 2288 * Now that has been set up, we need to go through and calculate all of 2289 * the rest of the parameters that exist. If we think the CPU doesn't 2290 * have either SMT (HTT) or CMP, then we basically go through and fake 2291 * up information in some way. The most likely case for this is 2292 * virtualization where we have a lot of partial topology information. 2293 */ 2294 if (!is_x86_feature(featureset, X86FSET_HTT) && 2295 !is_x86_feature(featureset, X86FSET_CMP)) { 2296 /* 2297 * This is a single core, single-threaded processor. 2298 */ 2299 cpi->cpi_procnodes_per_pkg = 1; 2300 cpi->cpi_cores_per_compunit = 1; 2301 cpi->cpi_compunitid = 0; 2302 cpi->cpi_chipid = -1; 2303 cpi->cpi_clogid = 0; 2304 cpi->cpi_coreid = cpu->cpu_id; 2305 cpi->cpi_pkgcoreid = 0; 2306 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 2307 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 2308 } else { 2309 cpi->cpi_procnodeid = cpi->cpi_chipid; 2310 } 2311 } else { 2312 switch (cpi->cpi_vendor) { 2313 case X86_VENDOR_Intel: 2314 cpuid_intel_getids(cpu, featureset); 2315 break; 2316 case X86_VENDOR_AMD: 2317 cpuid_amd_getids(cpu, featureset); 2318 break; 2319 default: 2320 /* 2321 * In this case, it's hard to say what we should do. 2322 * We're going to model them to the OS as single core 2323 * threads. We don't have a good identifier for them, so 2324 * we're just going to use the cpu id all on a single 2325 * chip. 2326 * 2327 * This case has historically been different from the 2328 * case above where we don't have HTT or CMP. While they 2329 * could be combined, we've opted to keep it separate to 2330 * minimize the risk of topology changes in weird cases. 2331 */ 2332 cpi->cpi_procnodes_per_pkg = 1; 2333 cpi->cpi_cores_per_compunit = 1; 2334 cpi->cpi_chipid = 0; 2335 cpi->cpi_coreid = cpu->cpu_id; 2336 cpi->cpi_clogid = cpu->cpu_id; 2337 cpi->cpi_pkgcoreid = cpu->cpu_id; 2338 cpi->cpi_procnodeid = cpi->cpi_chipid; 2339 cpi->cpi_compunitid = cpi->cpi_coreid; 2340 break; 2341 } 2342 } 2343 } 2344 2345 void 2346 cpuid_pass1(cpu_t *cpu, uchar_t *featureset) 2347 { 2348 uint32_t mask_ecx, mask_edx; 2349 struct cpuid_info *cpi; 2350 struct cpuid_regs *cp; 2351 int xcpuid; 2352 #if !defined(__xpv) 2353 extern int idle_cpu_prefer_mwait; 2354 #endif 2355 2356 /* 2357 * Space statically allocated for BSP, ensure pointer is set 2358 */ 2359 if (cpu->cpu_id == 0) { 2360 if (cpu->cpu_m.mcpu_cpi == NULL) 2361 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 2362 } 2363 2364 add_x86_feature(featureset, X86FSET_CPUID); 2365 2366 cpi = cpu->cpu_m.mcpu_cpi; 2367 ASSERT(cpi != NULL); 2368 cp = &cpi->cpi_std[0]; 2369 cp->cp_eax = 0; 2370 cpi->cpi_maxeax = __cpuid_insn(cp); 2371 { 2372 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 2373 *iptr++ = cp->cp_ebx; 2374 *iptr++ = cp->cp_edx; 2375 *iptr++ = cp->cp_ecx; 2376 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 2377 } 2378 2379 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 2380 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 2381 2382 /* 2383 * Limit the range in case of weird hardware 2384 */ 2385 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 2386 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 2387 if (cpi->cpi_maxeax < 1) 2388 goto pass1_done; 2389 2390 cp = &cpi->cpi_std[1]; 2391 cp->cp_eax = 1; 2392 (void) __cpuid_insn(cp); 2393 2394 /* 2395 * Extract identifying constants for easy access. 2396 */ 2397 cpi->cpi_model = CPI_MODEL(cpi); 2398 cpi->cpi_family = CPI_FAMILY(cpi); 2399 2400 if (cpi->cpi_family == 0xf) 2401 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 2402 2403 /* 2404 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 2405 * Intel, and presumably everyone else, uses model == 0xf, as 2406 * one would expect (max value means possible overflow). Sigh. 2407 */ 2408 2409 switch (cpi->cpi_vendor) { 2410 case X86_VENDOR_Intel: 2411 if (IS_EXTENDED_MODEL_INTEL(cpi)) 2412 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 2413 break; 2414 case X86_VENDOR_AMD: 2415 if (CPI_FAMILY(cpi) == 0xf) 2416 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 2417 break; 2418 default: 2419 if (cpi->cpi_model == 0xf) 2420 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 2421 break; 2422 } 2423 2424 cpi->cpi_step = CPI_STEP(cpi); 2425 cpi->cpi_brandid = CPI_BRANDID(cpi); 2426 2427 /* 2428 * *default* assumptions: 2429 * - believe %edx feature word 2430 * - ignore %ecx feature word 2431 * - 32-bit virtual and physical addressing 2432 */ 2433 mask_edx = 0xffffffff; 2434 mask_ecx = 0; 2435 2436 cpi->cpi_pabits = cpi->cpi_vabits = 32; 2437 2438 switch (cpi->cpi_vendor) { 2439 case X86_VENDOR_Intel: 2440 if (cpi->cpi_family == 5) 2441 x86_type = X86_TYPE_P5; 2442 else if (IS_LEGACY_P6(cpi)) { 2443 x86_type = X86_TYPE_P6; 2444 pentiumpro_bug4046376 = 1; 2445 /* 2446 * Clear the SEP bit when it was set erroneously 2447 */ 2448 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 2449 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 2450 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 2451 x86_type = X86_TYPE_P4; 2452 /* 2453 * We don't currently depend on any of the %ecx 2454 * features until Prescott, so we'll only check 2455 * this from P4 onwards. We might want to revisit 2456 * that idea later. 2457 */ 2458 mask_ecx = 0xffffffff; 2459 } else if (cpi->cpi_family > 0xf) 2460 mask_ecx = 0xffffffff; 2461 /* 2462 * We don't support MONITOR/MWAIT if leaf 5 is not available 2463 * to obtain the monitor linesize. 2464 */ 2465 if (cpi->cpi_maxeax < 5) 2466 mask_ecx &= ~CPUID_INTC_ECX_MON; 2467 break; 2468 case X86_VENDOR_IntelClone: 2469 default: 2470 break; 2471 case X86_VENDOR_AMD: 2472 #if defined(OPTERON_ERRATUM_108) 2473 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 2474 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 2475 cpi->cpi_model = 0xc; 2476 } else 2477 #endif 2478 if (cpi->cpi_family == 5) { 2479 /* 2480 * AMD K5 and K6 2481 * 2482 * These CPUs have an incomplete implementation 2483 * of MCA/MCE which we mask away. 2484 */ 2485 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 2486 2487 /* 2488 * Model 0 uses the wrong (APIC) bit 2489 * to indicate PGE. Fix it here. 2490 */ 2491 if (cpi->cpi_model == 0) { 2492 if (cp->cp_edx & 0x200) { 2493 cp->cp_edx &= ~0x200; 2494 cp->cp_edx |= CPUID_INTC_EDX_PGE; 2495 } 2496 } 2497 2498 /* 2499 * Early models had problems w/ MMX; disable. 2500 */ 2501 if (cpi->cpi_model < 6) 2502 mask_edx &= ~CPUID_INTC_EDX_MMX; 2503 } 2504 2505 /* 2506 * For newer families, SSE3 and CX16, at least, are valid; 2507 * enable all 2508 */ 2509 if (cpi->cpi_family >= 0xf) 2510 mask_ecx = 0xffffffff; 2511 /* 2512 * We don't support MONITOR/MWAIT if leaf 5 is not available 2513 * to obtain the monitor linesize. 2514 */ 2515 if (cpi->cpi_maxeax < 5) 2516 mask_ecx &= ~CPUID_INTC_ECX_MON; 2517 2518 #if !defined(__xpv) 2519 /* 2520 * AMD has not historically used MWAIT in the CPU's idle loop. 2521 * Pre-family-10h Opterons do not have the MWAIT instruction. We 2522 * know for certain that in at least family 17h, per AMD, mwait 2523 * is preferred. Families in-between are less certain. 2524 */ 2525 if (cpi->cpi_family < 0x17) { 2526 idle_cpu_prefer_mwait = 0; 2527 } 2528 #endif 2529 2530 break; 2531 case X86_VENDOR_TM: 2532 /* 2533 * workaround the NT workaround in CMS 4.1 2534 */ 2535 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 2536 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 2537 cp->cp_edx |= CPUID_INTC_EDX_CX8; 2538 break; 2539 case X86_VENDOR_Centaur: 2540 /* 2541 * workaround the NT workarounds again 2542 */ 2543 if (cpi->cpi_family == 6) 2544 cp->cp_edx |= CPUID_INTC_EDX_CX8; 2545 break; 2546 case X86_VENDOR_Cyrix: 2547 /* 2548 * We rely heavily on the probing in locore 2549 * to actually figure out what parts, if any, 2550 * of the Cyrix cpuid instruction to believe. 2551 */ 2552 switch (x86_type) { 2553 case X86_TYPE_CYRIX_486: 2554 mask_edx = 0; 2555 break; 2556 case X86_TYPE_CYRIX_6x86: 2557 mask_edx = 0; 2558 break; 2559 case X86_TYPE_CYRIX_6x86L: 2560 mask_edx = 2561 CPUID_INTC_EDX_DE | 2562 CPUID_INTC_EDX_CX8; 2563 break; 2564 case X86_TYPE_CYRIX_6x86MX: 2565 mask_edx = 2566 CPUID_INTC_EDX_DE | 2567 CPUID_INTC_EDX_MSR | 2568 CPUID_INTC_EDX_CX8 | 2569 CPUID_INTC_EDX_PGE | 2570 CPUID_INTC_EDX_CMOV | 2571 CPUID_INTC_EDX_MMX; 2572 break; 2573 case X86_TYPE_CYRIX_GXm: 2574 mask_edx = 2575 CPUID_INTC_EDX_MSR | 2576 CPUID_INTC_EDX_CX8 | 2577 CPUID_INTC_EDX_CMOV | 2578 CPUID_INTC_EDX_MMX; 2579 break; 2580 case X86_TYPE_CYRIX_MediaGX: 2581 break; 2582 case X86_TYPE_CYRIX_MII: 2583 case X86_TYPE_VIA_CYRIX_III: 2584 mask_edx = 2585 CPUID_INTC_EDX_DE | 2586 CPUID_INTC_EDX_TSC | 2587 CPUID_INTC_EDX_MSR | 2588 CPUID_INTC_EDX_CX8 | 2589 CPUID_INTC_EDX_PGE | 2590 CPUID_INTC_EDX_CMOV | 2591 CPUID_INTC_EDX_MMX; 2592 break; 2593 default: 2594 break; 2595 } 2596 break; 2597 } 2598 2599 #if defined(__xpv) 2600 /* 2601 * Do not support MONITOR/MWAIT under a hypervisor 2602 */ 2603 mask_ecx &= ~CPUID_INTC_ECX_MON; 2604 /* 2605 * Do not support XSAVE under a hypervisor for now 2606 */ 2607 xsave_force_disable = B_TRUE; 2608 2609 #endif /* __xpv */ 2610 2611 if (xsave_force_disable) { 2612 mask_ecx &= ~CPUID_INTC_ECX_XSAVE; 2613 mask_ecx &= ~CPUID_INTC_ECX_AVX; 2614 mask_ecx &= ~CPUID_INTC_ECX_F16C; 2615 mask_ecx &= ~CPUID_INTC_ECX_FMA; 2616 } 2617 2618 /* 2619 * Now we've figured out the masks that determine 2620 * which bits we choose to believe, apply the masks 2621 * to the feature words, then map the kernel's view 2622 * of these feature words into its feature word. 2623 */ 2624 cp->cp_edx &= mask_edx; 2625 cp->cp_ecx &= mask_ecx; 2626 2627 /* 2628 * apply any platform restrictions (we don't call this 2629 * immediately after __cpuid_insn here, because we need the 2630 * workarounds applied above first) 2631 */ 2632 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 2633 2634 /* 2635 * In addition to ecx and edx, Intel and AMD are storing a bunch of 2636 * instruction set extensions in leaf 7's ebx, ecx, and edx. 2637 */ 2638 if (cpi->cpi_maxeax >= 7) { 2639 struct cpuid_regs *ecp; 2640 ecp = &cpi->cpi_std[7]; 2641 ecp->cp_eax = 7; 2642 ecp->cp_ecx = 0; 2643 (void) __cpuid_insn(ecp); 2644 2645 /* 2646 * If XSAVE has been disabled, just ignore all of the 2647 * extended-save-area dependent flags here. 2648 */ 2649 if (xsave_force_disable) { 2650 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 2651 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 2652 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 2653 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX; 2654 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512; 2655 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512; 2656 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512; 2657 } 2658 2659 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP) 2660 add_x86_feature(featureset, X86FSET_SMEP); 2661 2662 /* 2663 * We check disable_smap here in addition to in startup_smap() 2664 * to ensure CPUs that aren't the boot CPU don't accidentally 2665 * include it in the feature set and thus generate a mismatched 2666 * x86 feature set across CPUs. 2667 */ 2668 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP && 2669 disable_smap == 0) 2670 add_x86_feature(featureset, X86FSET_SMAP); 2671 2672 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED) 2673 add_x86_feature(featureset, X86FSET_RDSEED); 2674 2675 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX) 2676 add_x86_feature(featureset, X86FSET_ADX); 2677 2678 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE) 2679 add_x86_feature(featureset, X86FSET_FSGSBASE); 2680 2681 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT) 2682 add_x86_feature(featureset, X86FSET_CLFLUSHOPT); 2683 2684 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2685 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID) 2686 add_x86_feature(featureset, X86FSET_INVPCID); 2687 2688 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX) 2689 add_x86_feature(featureset, X86FSET_MPX); 2690 2691 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB) 2692 add_x86_feature(featureset, X86FSET_CLWB); 2693 } 2694 } 2695 2696 /* 2697 * fold in overrides from the "eeprom" mechanism 2698 */ 2699 cp->cp_edx |= cpuid_feature_edx_include; 2700 cp->cp_edx &= ~cpuid_feature_edx_exclude; 2701 2702 cp->cp_ecx |= cpuid_feature_ecx_include; 2703 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 2704 2705 if (cp->cp_edx & CPUID_INTC_EDX_PSE) { 2706 add_x86_feature(featureset, X86FSET_LARGEPAGE); 2707 } 2708 if (cp->cp_edx & CPUID_INTC_EDX_TSC) { 2709 add_x86_feature(featureset, X86FSET_TSC); 2710 } 2711 if (cp->cp_edx & CPUID_INTC_EDX_MSR) { 2712 add_x86_feature(featureset, X86FSET_MSR); 2713 } 2714 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) { 2715 add_x86_feature(featureset, X86FSET_MTRR); 2716 } 2717 if (cp->cp_edx & CPUID_INTC_EDX_PGE) { 2718 add_x86_feature(featureset, X86FSET_PGE); 2719 } 2720 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) { 2721 add_x86_feature(featureset, X86FSET_CMOV); 2722 } 2723 if (cp->cp_edx & CPUID_INTC_EDX_MMX) { 2724 add_x86_feature(featureset, X86FSET_MMX); 2725 } 2726 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 2727 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) { 2728 add_x86_feature(featureset, X86FSET_MCA); 2729 } 2730 if (cp->cp_edx & CPUID_INTC_EDX_PAE) { 2731 add_x86_feature(featureset, X86FSET_PAE); 2732 } 2733 if (cp->cp_edx & CPUID_INTC_EDX_CX8) { 2734 add_x86_feature(featureset, X86FSET_CX8); 2735 } 2736 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) { 2737 add_x86_feature(featureset, X86FSET_CX16); 2738 } 2739 if (cp->cp_edx & CPUID_INTC_EDX_PAT) { 2740 add_x86_feature(featureset, X86FSET_PAT); 2741 } 2742 if (cp->cp_edx & CPUID_INTC_EDX_SEP) { 2743 add_x86_feature(featureset, X86FSET_SEP); 2744 } 2745 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 2746 /* 2747 * In our implementation, fxsave/fxrstor 2748 * are prerequisites before we'll even 2749 * try and do SSE things. 2750 */ 2751 if (cp->cp_edx & CPUID_INTC_EDX_SSE) { 2752 add_x86_feature(featureset, X86FSET_SSE); 2753 } 2754 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) { 2755 add_x86_feature(featureset, X86FSET_SSE2); 2756 } 2757 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) { 2758 add_x86_feature(featureset, X86FSET_SSE3); 2759 } 2760 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) { 2761 add_x86_feature(featureset, X86FSET_SSSE3); 2762 } 2763 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) { 2764 add_x86_feature(featureset, X86FSET_SSE4_1); 2765 } 2766 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) { 2767 add_x86_feature(featureset, X86FSET_SSE4_2); 2768 } 2769 if (cp->cp_ecx & CPUID_INTC_ECX_AES) { 2770 add_x86_feature(featureset, X86FSET_AES); 2771 } 2772 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { 2773 add_x86_feature(featureset, X86FSET_PCLMULQDQ); 2774 } 2775 2776 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA) 2777 add_x86_feature(featureset, X86FSET_SHA); 2778 2779 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP) 2780 add_x86_feature(featureset, X86FSET_UMIP); 2781 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU) 2782 add_x86_feature(featureset, X86FSET_PKU); 2783 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE) 2784 add_x86_feature(featureset, X86FSET_OSPKE); 2785 2786 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) { 2787 add_x86_feature(featureset, X86FSET_XSAVE); 2788 2789 /* We only test AVX & AVX512 when there is XSAVE */ 2790 2791 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) { 2792 add_x86_feature(featureset, 2793 X86FSET_AVX); 2794 2795 /* 2796 * Intel says we can't check these without also 2797 * checking AVX. 2798 */ 2799 if (cp->cp_ecx & CPUID_INTC_ECX_F16C) 2800 add_x86_feature(featureset, 2801 X86FSET_F16C); 2802 2803 if (cp->cp_ecx & CPUID_INTC_ECX_FMA) 2804 add_x86_feature(featureset, 2805 X86FSET_FMA); 2806 2807 if (cpi->cpi_std[7].cp_ebx & 2808 CPUID_INTC_EBX_7_0_BMI1) 2809 add_x86_feature(featureset, 2810 X86FSET_BMI1); 2811 2812 if (cpi->cpi_std[7].cp_ebx & 2813 CPUID_INTC_EBX_7_0_BMI2) 2814 add_x86_feature(featureset, 2815 X86FSET_BMI2); 2816 2817 if (cpi->cpi_std[7].cp_ebx & 2818 CPUID_INTC_EBX_7_0_AVX2) 2819 add_x86_feature(featureset, 2820 X86FSET_AVX2); 2821 } 2822 2823 if (cpi->cpi_vendor == X86_VENDOR_Intel && 2824 (cpi->cpi_std[7].cp_ebx & 2825 CPUID_INTC_EBX_7_0_AVX512F) != 0) { 2826 add_x86_feature(featureset, X86FSET_AVX512F); 2827 2828 if (cpi->cpi_std[7].cp_ebx & 2829 CPUID_INTC_EBX_7_0_AVX512DQ) 2830 add_x86_feature(featureset, 2831 X86FSET_AVX512DQ); 2832 if (cpi->cpi_std[7].cp_ebx & 2833 CPUID_INTC_EBX_7_0_AVX512IFMA) 2834 add_x86_feature(featureset, 2835 X86FSET_AVX512FMA); 2836 if (cpi->cpi_std[7].cp_ebx & 2837 CPUID_INTC_EBX_7_0_AVX512PF) 2838 add_x86_feature(featureset, 2839 X86FSET_AVX512PF); 2840 if (cpi->cpi_std[7].cp_ebx & 2841 CPUID_INTC_EBX_7_0_AVX512ER) 2842 add_x86_feature(featureset, 2843 X86FSET_AVX512ER); 2844 if (cpi->cpi_std[7].cp_ebx & 2845 CPUID_INTC_EBX_7_0_AVX512CD) 2846 add_x86_feature(featureset, 2847 X86FSET_AVX512CD); 2848 if (cpi->cpi_std[7].cp_ebx & 2849 CPUID_INTC_EBX_7_0_AVX512BW) 2850 add_x86_feature(featureset, 2851 X86FSET_AVX512BW); 2852 if (cpi->cpi_std[7].cp_ebx & 2853 CPUID_INTC_EBX_7_0_AVX512VL) 2854 add_x86_feature(featureset, 2855 X86FSET_AVX512VL); 2856 2857 if (cpi->cpi_std[7].cp_ecx & 2858 CPUID_INTC_ECX_7_0_AVX512VBMI) 2859 add_x86_feature(featureset, 2860 X86FSET_AVX512VBMI); 2861 if (cpi->cpi_std[7].cp_ecx & 2862 CPUID_INTC_ECX_7_0_AVX512VNNI) 2863 add_x86_feature(featureset, 2864 X86FSET_AVX512VNNI); 2865 if (cpi->cpi_std[7].cp_ecx & 2866 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ) 2867 add_x86_feature(featureset, 2868 X86FSET_AVX512VPOPCDQ); 2869 2870 if (cpi->cpi_std[7].cp_edx & 2871 CPUID_INTC_EDX_7_0_AVX5124NNIW) 2872 add_x86_feature(featureset, 2873 X86FSET_AVX512NNIW); 2874 if (cpi->cpi_std[7].cp_edx & 2875 CPUID_INTC_EDX_7_0_AVX5124FMAPS) 2876 add_x86_feature(featureset, 2877 X86FSET_AVX512FMAPS); 2878 } 2879 } 2880 } 2881 2882 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2883 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) { 2884 add_x86_feature(featureset, X86FSET_PCID); 2885 } 2886 } 2887 2888 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) { 2889 add_x86_feature(featureset, X86FSET_X2APIC); 2890 } 2891 if (cp->cp_edx & CPUID_INTC_EDX_DE) { 2892 add_x86_feature(featureset, X86FSET_DE); 2893 } 2894 #if !defined(__xpv) 2895 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 2896 2897 /* 2898 * We require the CLFLUSH instruction for erratum workaround 2899 * to use MONITOR/MWAIT. 2900 */ 2901 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 2902 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 2903 add_x86_feature(featureset, X86FSET_MWAIT); 2904 } else { 2905 extern int idle_cpu_assert_cflush_monitor; 2906 2907 /* 2908 * All processors we are aware of which have 2909 * MONITOR/MWAIT also have CLFLUSH. 2910 */ 2911 if (idle_cpu_assert_cflush_monitor) { 2912 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 2913 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 2914 } 2915 } 2916 } 2917 #endif /* __xpv */ 2918 2919 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) { 2920 add_x86_feature(featureset, X86FSET_VMX); 2921 } 2922 2923 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND) 2924 add_x86_feature(featureset, X86FSET_RDRAND); 2925 2926 /* 2927 * Only need it first time, rest of the cpus would follow suit. 2928 * we only capture this for the bootcpu. 2929 */ 2930 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 2931 add_x86_feature(featureset, X86FSET_CLFSH); 2932 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 2933 } 2934 if (is_x86_feature(featureset, X86FSET_PAE)) 2935 cpi->cpi_pabits = 36; 2936 2937 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) { 2938 struct cpuid_regs r, *ecp; 2939 2940 ecp = &r; 2941 ecp->cp_eax = 0xD; 2942 ecp->cp_ecx = 1; 2943 ecp->cp_edx = ecp->cp_ebx = 0; 2944 (void) __cpuid_insn(ecp); 2945 2946 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT) 2947 add_x86_feature(featureset, X86FSET_XSAVEOPT); 2948 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC) 2949 add_x86_feature(featureset, X86FSET_XSAVEC); 2950 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES) 2951 add_x86_feature(featureset, X86FSET_XSAVES); 2952 } 2953 2954 /* 2955 * Work on the "extended" feature information, doing 2956 * some basic initialization for cpuid_pass2() 2957 */ 2958 xcpuid = 0; 2959 switch (cpi->cpi_vendor) { 2960 case X86_VENDOR_Intel: 2961 /* 2962 * On KVM we know we will have proper support for extended 2963 * cpuid. 2964 */ 2965 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf || 2966 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 && 2967 (cpi->cpi_model == 6 || cpi->cpi_model == 2))) 2968 xcpuid++; 2969 break; 2970 case X86_VENDOR_AMD: 2971 if (cpi->cpi_family > 5 || 2972 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 2973 xcpuid++; 2974 break; 2975 case X86_VENDOR_Cyrix: 2976 /* 2977 * Only these Cyrix CPUs are -known- to support 2978 * extended cpuid operations. 2979 */ 2980 if (x86_type == X86_TYPE_VIA_CYRIX_III || 2981 x86_type == X86_TYPE_CYRIX_GXm) 2982 xcpuid++; 2983 break; 2984 case X86_VENDOR_Centaur: 2985 case X86_VENDOR_TM: 2986 default: 2987 xcpuid++; 2988 break; 2989 } 2990 2991 if (xcpuid) { 2992 cp = &cpi->cpi_extd[0]; 2993 cp->cp_eax = CPUID_LEAF_EXT_0; 2994 cpi->cpi_xmaxeax = __cpuid_insn(cp); 2995 } 2996 2997 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) { 2998 2999 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 3000 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 3001 3002 switch (cpi->cpi_vendor) { 3003 case X86_VENDOR_Intel: 3004 case X86_VENDOR_AMD: 3005 if (cpi->cpi_xmaxeax < 0x80000001) 3006 break; 3007 cp = &cpi->cpi_extd[1]; 3008 cp->cp_eax = 0x80000001; 3009 (void) __cpuid_insn(cp); 3010 3011 if (cpi->cpi_vendor == X86_VENDOR_AMD && 3012 cpi->cpi_family == 5 && 3013 cpi->cpi_model == 6 && 3014 cpi->cpi_step == 6) { 3015 /* 3016 * K6 model 6 uses bit 10 to indicate SYSC 3017 * Later models use bit 11. Fix it here. 3018 */ 3019 if (cp->cp_edx & 0x400) { 3020 cp->cp_edx &= ~0x400; 3021 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 3022 } 3023 } 3024 3025 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 3026 3027 /* 3028 * Compute the additions to the kernel's feature word. 3029 */ 3030 if (cp->cp_edx & CPUID_AMD_EDX_NX) { 3031 add_x86_feature(featureset, X86FSET_NX); 3032 } 3033 3034 /* 3035 * Regardless whether or not we boot 64-bit, 3036 * we should have a way to identify whether 3037 * the CPU is capable of running 64-bit. 3038 */ 3039 if (cp->cp_edx & CPUID_AMD_EDX_LM) { 3040 add_x86_feature(featureset, X86FSET_64); 3041 } 3042 3043 /* 1 GB large page - enable only for 64 bit kernel */ 3044 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) { 3045 add_x86_feature(featureset, X86FSET_1GPG); 3046 } 3047 3048 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 3049 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 3050 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { 3051 add_x86_feature(featureset, X86FSET_SSE4A); 3052 } 3053 3054 /* 3055 * It's really tricky to support syscall/sysret in 3056 * the i386 kernel; we rely on sysenter/sysexit 3057 * instead. In the amd64 kernel, things are -way- 3058 * better. 3059 */ 3060 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) { 3061 add_x86_feature(featureset, X86FSET_ASYSC); 3062 } 3063 3064 /* 3065 * While we're thinking about system calls, note 3066 * that AMD processors don't support sysenter 3067 * in long mode at all, so don't try to program them. 3068 */ 3069 if (x86_vendor == X86_VENDOR_AMD) { 3070 remove_x86_feature(featureset, X86FSET_SEP); 3071 } 3072 3073 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) { 3074 add_x86_feature(featureset, X86FSET_TSCP); 3075 } 3076 3077 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) { 3078 add_x86_feature(featureset, X86FSET_SVM); 3079 } 3080 3081 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) { 3082 add_x86_feature(featureset, X86FSET_TOPOEXT); 3083 } 3084 3085 if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) { 3086 add_x86_feature(featureset, X86FSET_AMD_PCEC); 3087 } 3088 3089 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) { 3090 add_x86_feature(featureset, X86FSET_XOP); 3091 } 3092 3093 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) { 3094 add_x86_feature(featureset, X86FSET_FMA4); 3095 } 3096 3097 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) { 3098 add_x86_feature(featureset, X86FSET_TBM); 3099 } 3100 3101 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) { 3102 add_x86_feature(featureset, X86FSET_MONITORX); 3103 } 3104 break; 3105 default: 3106 break; 3107 } 3108 3109 /* 3110 * Get CPUID data about processor cores and hyperthreads. 3111 */ 3112 switch (cpi->cpi_vendor) { 3113 case X86_VENDOR_Intel: 3114 if (cpi->cpi_maxeax >= 4) { 3115 cp = &cpi->cpi_std[4]; 3116 cp->cp_eax = 4; 3117 cp->cp_ecx = 0; 3118 (void) __cpuid_insn(cp); 3119 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 3120 } 3121 /*FALLTHROUGH*/ 3122 case X86_VENDOR_AMD: 3123 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) 3124 break; 3125 cp = &cpi->cpi_extd[8]; 3126 cp->cp_eax = CPUID_LEAF_EXT_8; 3127 (void) __cpuid_insn(cp); 3128 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, 3129 cp); 3130 3131 /* 3132 * AMD uses ebx for some extended functions. 3133 */ 3134 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 3135 /* 3136 * While we're here, check for the AMD "Error 3137 * Pointer Zero/Restore" feature. This can be 3138 * used to setup the FP save handlers 3139 * appropriately. 3140 */ 3141 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) { 3142 cpi->cpi_fp_amd_save = 0; 3143 } else { 3144 cpi->cpi_fp_amd_save = 1; 3145 } 3146 3147 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) { 3148 add_x86_feature(featureset, 3149 X86FSET_CLZERO); 3150 } 3151 } 3152 3153 /* 3154 * Virtual and physical address limits from 3155 * cpuid override previously guessed values. 3156 */ 3157 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 3158 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 3159 break; 3160 default: 3161 break; 3162 } 3163 3164 /* 3165 * Get CPUID data about TSC Invariance in Deep C-State. 3166 */ 3167 switch (cpi->cpi_vendor) { 3168 case X86_VENDOR_Intel: 3169 case X86_VENDOR_AMD: 3170 if (cpi->cpi_maxeax >= 7) { 3171 cp = &cpi->cpi_extd[7]; 3172 cp->cp_eax = 0x80000007; 3173 cp->cp_ecx = 0; 3174 (void) __cpuid_insn(cp); 3175 } 3176 break; 3177 default: 3178 break; 3179 } 3180 } 3181 3182 cpuid_pass1_topology(cpu, featureset); 3183 3184 /* 3185 * Synthesize chip "revision" and socket type 3186 */ 3187 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 3188 cpi->cpi_model, cpi->cpi_step); 3189 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 3190 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 3191 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 3192 cpi->cpi_model, cpi->cpi_step); 3193 3194 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 3195 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 && 3196 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) { 3197 /* Special handling for AMD FP not necessary. */ 3198 cpi->cpi_fp_amd_save = 0; 3199 } else { 3200 cpi->cpi_fp_amd_save = 1; 3201 } 3202 } 3203 3204 /* 3205 * Check the processor leaves that are used for security features. 3206 */ 3207 cpuid_scan_security(cpu, featureset); 3208 3209 pass1_done: 3210 cpi->cpi_pass = 1; 3211 } 3212 3213 /* 3214 * Make copies of the cpuid table entries we depend on, in 3215 * part for ease of parsing now, in part so that we have only 3216 * one place to correct any of it, in part for ease of 3217 * later export to userland, and in part so we can look at 3218 * this stuff in a crash dump. 3219 */ 3220 3221 /*ARGSUSED*/ 3222 void 3223 cpuid_pass2(cpu_t *cpu) 3224 { 3225 uint_t n, nmax; 3226 int i; 3227 struct cpuid_regs *cp; 3228 uint8_t *dp; 3229 uint32_t *iptr; 3230 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3231 3232 ASSERT(cpi->cpi_pass == 1); 3233 3234 if (cpi->cpi_maxeax < 1) 3235 goto pass2_done; 3236 3237 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 3238 nmax = NMAX_CPI_STD; 3239 /* 3240 * (We already handled n == 0 and n == 1 in pass 1) 3241 */ 3242 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 3243 cp->cp_eax = n; 3244 3245 /* 3246 * n == 7 was handled in pass 1 3247 */ 3248 if (n == 7) 3249 continue; 3250 3251 /* 3252 * CPUID function 4 expects %ecx to be initialized 3253 * with an index which indicates which cache to return 3254 * information about. The OS is expected to call function 4 3255 * with %ecx set to 0, 1, 2, ... until it returns with 3256 * EAX[4:0] set to 0, which indicates there are no more 3257 * caches. 3258 * 3259 * Here, populate cpi_std[4] with the information returned by 3260 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 3261 * when dynamic memory allocation becomes available. 3262 * 3263 * Note: we need to explicitly initialize %ecx here, since 3264 * function 4 may have been previously invoked. 3265 */ 3266 if (n == 4) 3267 cp->cp_ecx = 0; 3268 3269 (void) __cpuid_insn(cp); 3270 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 3271 switch (n) { 3272 case 2: 3273 /* 3274 * "the lower 8 bits of the %eax register 3275 * contain a value that identifies the number 3276 * of times the cpuid [instruction] has to be 3277 * executed to obtain a complete image of the 3278 * processor's caching systems." 3279 * 3280 * How *do* they make this stuff up? 3281 */ 3282 cpi->cpi_ncache = sizeof (*cp) * 3283 BITX(cp->cp_eax, 7, 0); 3284 if (cpi->cpi_ncache == 0) 3285 break; 3286 cpi->cpi_ncache--; /* skip count byte */ 3287 3288 /* 3289 * Well, for now, rather than attempt to implement 3290 * this slightly dubious algorithm, we just look 3291 * at the first 15 .. 3292 */ 3293 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 3294 cpi->cpi_ncache = sizeof (*cp) - 1; 3295 3296 dp = cpi->cpi_cacheinfo; 3297 if (BITX(cp->cp_eax, 31, 31) == 0) { 3298 uint8_t *p = (void *)&cp->cp_eax; 3299 for (i = 1; i < 4; i++) 3300 if (p[i] != 0) 3301 *dp++ = p[i]; 3302 } 3303 if (BITX(cp->cp_ebx, 31, 31) == 0) { 3304 uint8_t *p = (void *)&cp->cp_ebx; 3305 for (i = 0; i < 4; i++) 3306 if (p[i] != 0) 3307 *dp++ = p[i]; 3308 } 3309 if (BITX(cp->cp_ecx, 31, 31) == 0) { 3310 uint8_t *p = (void *)&cp->cp_ecx; 3311 for (i = 0; i < 4; i++) 3312 if (p[i] != 0) 3313 *dp++ = p[i]; 3314 } 3315 if (BITX(cp->cp_edx, 31, 31) == 0) { 3316 uint8_t *p = (void *)&cp->cp_edx; 3317 for (i = 0; i < 4; i++) 3318 if (p[i] != 0) 3319 *dp++ = p[i]; 3320 } 3321 break; 3322 3323 case 3: /* Processor serial number, if PSN supported */ 3324 break; 3325 3326 case 4: /* Deterministic cache parameters */ 3327 break; 3328 3329 case 5: /* Monitor/Mwait parameters */ 3330 { 3331 size_t mwait_size; 3332 3333 /* 3334 * check cpi_mwait.support which was set in cpuid_pass1 3335 */ 3336 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 3337 break; 3338 3339 /* 3340 * Protect ourself from insane mwait line size. 3341 * Workaround for incomplete hardware emulator(s). 3342 */ 3343 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 3344 if (mwait_size < sizeof (uint32_t) || 3345 !ISP2(mwait_size)) { 3346 #if DEBUG 3347 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 3348 "size %ld", cpu->cpu_id, (long)mwait_size); 3349 #endif 3350 break; 3351 } 3352 3353 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 3354 cpi->cpi_mwait.mon_max = mwait_size; 3355 if (MWAIT_EXTENSION(cpi)) { 3356 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 3357 if (MWAIT_INT_ENABLE(cpi)) 3358 cpi->cpi_mwait.support |= 3359 MWAIT_ECX_INT_ENABLE; 3360 } 3361 break; 3362 } 3363 default: 3364 break; 3365 } 3366 } 3367 3368 /* 3369 * XSAVE enumeration 3370 */ 3371 if (cpi->cpi_maxeax >= 0xD) { 3372 struct cpuid_regs regs; 3373 boolean_t cpuid_d_valid = B_TRUE; 3374 3375 cp = ®s; 3376 cp->cp_eax = 0xD; 3377 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 3378 3379 (void) __cpuid_insn(cp); 3380 3381 /* 3382 * Sanity checks for debug 3383 */ 3384 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 || 3385 (cp->cp_eax & XFEATURE_SSE) == 0) { 3386 cpuid_d_valid = B_FALSE; 3387 } 3388 3389 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax; 3390 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx; 3391 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx; 3392 3393 /* 3394 * If the hw supports AVX, get the size and offset in the save 3395 * area for the ymm state. 3396 */ 3397 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) { 3398 cp->cp_eax = 0xD; 3399 cp->cp_ecx = 2; 3400 cp->cp_edx = cp->cp_ebx = 0; 3401 3402 (void) __cpuid_insn(cp); 3403 3404 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET || 3405 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) { 3406 cpuid_d_valid = B_FALSE; 3407 } 3408 3409 cpi->cpi_xsave.ymm_size = cp->cp_eax; 3410 cpi->cpi_xsave.ymm_offset = cp->cp_ebx; 3411 } 3412 3413 /* 3414 * If the hw supports MPX, get the size and offset in the 3415 * save area for BNDREGS and BNDCSR. 3416 */ 3417 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) { 3418 cp->cp_eax = 0xD; 3419 cp->cp_ecx = 3; 3420 cp->cp_edx = cp->cp_ebx = 0; 3421 3422 (void) __cpuid_insn(cp); 3423 3424 cpi->cpi_xsave.bndregs_size = cp->cp_eax; 3425 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx; 3426 3427 cp->cp_eax = 0xD; 3428 cp->cp_ecx = 4; 3429 cp->cp_edx = cp->cp_ebx = 0; 3430 3431 (void) __cpuid_insn(cp); 3432 3433 cpi->cpi_xsave.bndcsr_size = cp->cp_eax; 3434 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx; 3435 } 3436 3437 /* 3438 * If the hw supports AVX512, get the size and offset in the 3439 * save area for the opmask registers and zmm state. 3440 */ 3441 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) { 3442 cp->cp_eax = 0xD; 3443 cp->cp_ecx = 5; 3444 cp->cp_edx = cp->cp_ebx = 0; 3445 3446 (void) __cpuid_insn(cp); 3447 3448 cpi->cpi_xsave.opmask_size = cp->cp_eax; 3449 cpi->cpi_xsave.opmask_offset = cp->cp_ebx; 3450 3451 cp->cp_eax = 0xD; 3452 cp->cp_ecx = 6; 3453 cp->cp_edx = cp->cp_ebx = 0; 3454 3455 (void) __cpuid_insn(cp); 3456 3457 cpi->cpi_xsave.zmmlo_size = cp->cp_eax; 3458 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx; 3459 3460 cp->cp_eax = 0xD; 3461 cp->cp_ecx = 7; 3462 cp->cp_edx = cp->cp_ebx = 0; 3463 3464 (void) __cpuid_insn(cp); 3465 3466 cpi->cpi_xsave.zmmhi_size = cp->cp_eax; 3467 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx; 3468 } 3469 3470 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { 3471 xsave_state_size = 0; 3472 } else if (cpuid_d_valid) { 3473 xsave_state_size = cpi->cpi_xsave.xsav_max_size; 3474 } else { 3475 /* Broken CPUID 0xD, probably in HVM */ 3476 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid " 3477 "value: hw_low = %d, hw_high = %d, xsave_size = %d" 3478 ", ymm_size = %d, ymm_offset = %d\n", 3479 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low, 3480 cpi->cpi_xsave.xsav_hw_features_high, 3481 (int)cpi->cpi_xsave.xsav_max_size, 3482 (int)cpi->cpi_xsave.ymm_size, 3483 (int)cpi->cpi_xsave.ymm_offset); 3484 3485 if (xsave_state_size != 0) { 3486 /* 3487 * This must be a non-boot CPU. We cannot 3488 * continue, because boot cpu has already 3489 * enabled XSAVE. 3490 */ 3491 ASSERT(cpu->cpu_id != 0); 3492 cmn_err(CE_PANIC, "cpu%d: we have already " 3493 "enabled XSAVE on boot cpu, cannot " 3494 "continue.", cpu->cpu_id); 3495 } else { 3496 /* 3497 * If we reached here on the boot CPU, it's also 3498 * almost certain that we'll reach here on the 3499 * non-boot CPUs. When we're here on a boot CPU 3500 * we should disable the feature, on a non-boot 3501 * CPU we need to confirm that we have. 3502 */ 3503 if (cpu->cpu_id == 0) { 3504 remove_x86_feature(x86_featureset, 3505 X86FSET_XSAVE); 3506 remove_x86_feature(x86_featureset, 3507 X86FSET_AVX); 3508 remove_x86_feature(x86_featureset, 3509 X86FSET_F16C); 3510 remove_x86_feature(x86_featureset, 3511 X86FSET_BMI1); 3512 remove_x86_feature(x86_featureset, 3513 X86FSET_BMI2); 3514 remove_x86_feature(x86_featureset, 3515 X86FSET_FMA); 3516 remove_x86_feature(x86_featureset, 3517 X86FSET_AVX2); 3518 remove_x86_feature(x86_featureset, 3519 X86FSET_MPX); 3520 remove_x86_feature(x86_featureset, 3521 X86FSET_AVX512F); 3522 remove_x86_feature(x86_featureset, 3523 X86FSET_AVX512DQ); 3524 remove_x86_feature(x86_featureset, 3525 X86FSET_AVX512PF); 3526 remove_x86_feature(x86_featureset, 3527 X86FSET_AVX512ER); 3528 remove_x86_feature(x86_featureset, 3529 X86FSET_AVX512CD); 3530 remove_x86_feature(x86_featureset, 3531 X86FSET_AVX512BW); 3532 remove_x86_feature(x86_featureset, 3533 X86FSET_AVX512VL); 3534 remove_x86_feature(x86_featureset, 3535 X86FSET_AVX512FMA); 3536 remove_x86_feature(x86_featureset, 3537 X86FSET_AVX512VBMI); 3538 remove_x86_feature(x86_featureset, 3539 X86FSET_AVX512VNNI); 3540 remove_x86_feature(x86_featureset, 3541 X86FSET_AVX512VPOPCDQ); 3542 remove_x86_feature(x86_featureset, 3543 X86FSET_AVX512NNIW); 3544 remove_x86_feature(x86_featureset, 3545 X86FSET_AVX512FMAPS); 3546 3547 CPI_FEATURES_ECX(cpi) &= 3548 ~CPUID_INTC_ECX_XSAVE; 3549 CPI_FEATURES_ECX(cpi) &= 3550 ~CPUID_INTC_ECX_AVX; 3551 CPI_FEATURES_ECX(cpi) &= 3552 ~CPUID_INTC_ECX_F16C; 3553 CPI_FEATURES_ECX(cpi) &= 3554 ~CPUID_INTC_ECX_FMA; 3555 CPI_FEATURES_7_0_EBX(cpi) &= 3556 ~CPUID_INTC_EBX_7_0_BMI1; 3557 CPI_FEATURES_7_0_EBX(cpi) &= 3558 ~CPUID_INTC_EBX_7_0_BMI2; 3559 CPI_FEATURES_7_0_EBX(cpi) &= 3560 ~CPUID_INTC_EBX_7_0_AVX2; 3561 CPI_FEATURES_7_0_EBX(cpi) &= 3562 ~CPUID_INTC_EBX_7_0_MPX; 3563 CPI_FEATURES_7_0_EBX(cpi) &= 3564 ~CPUID_INTC_EBX_7_0_ALL_AVX512; 3565 3566 CPI_FEATURES_7_0_ECX(cpi) &= 3567 ~CPUID_INTC_ECX_7_0_ALL_AVX512; 3568 3569 CPI_FEATURES_7_0_EDX(cpi) &= 3570 ~CPUID_INTC_EDX_7_0_ALL_AVX512; 3571 3572 xsave_force_disable = B_TRUE; 3573 } else { 3574 VERIFY(is_x86_feature(x86_featureset, 3575 X86FSET_XSAVE) == B_FALSE); 3576 } 3577 } 3578 } 3579 } 3580 3581 3582 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) 3583 goto pass2_done; 3584 3585 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD) 3586 nmax = NMAX_CPI_EXTD; 3587 /* 3588 * Copy the extended properties, fixing them as we go. 3589 * (We already handled n == 0 and n == 1 in pass 1) 3590 */ 3591 iptr = (void *)cpi->cpi_brandstr; 3592 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 3593 cp->cp_eax = CPUID_LEAF_EXT_0 + n; 3594 (void) __cpuid_insn(cp); 3595 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n, 3596 cp); 3597 switch (n) { 3598 case 2: 3599 case 3: 3600 case 4: 3601 /* 3602 * Extract the brand string 3603 */ 3604 *iptr++ = cp->cp_eax; 3605 *iptr++ = cp->cp_ebx; 3606 *iptr++ = cp->cp_ecx; 3607 *iptr++ = cp->cp_edx; 3608 break; 3609 case 5: 3610 switch (cpi->cpi_vendor) { 3611 case X86_VENDOR_AMD: 3612 /* 3613 * The Athlon and Duron were the first 3614 * parts to report the sizes of the 3615 * TLB for large pages. Before then, 3616 * we don't trust the data. 3617 */ 3618 if (cpi->cpi_family < 6 || 3619 (cpi->cpi_family == 6 && 3620 cpi->cpi_model < 1)) 3621 cp->cp_eax = 0; 3622 break; 3623 default: 3624 break; 3625 } 3626 break; 3627 case 6: 3628 switch (cpi->cpi_vendor) { 3629 case X86_VENDOR_AMD: 3630 /* 3631 * The Athlon and Duron were the first 3632 * AMD parts with L2 TLB's. 3633 * Before then, don't trust the data. 3634 */ 3635 if (cpi->cpi_family < 6 || 3636 cpi->cpi_family == 6 && 3637 cpi->cpi_model < 1) 3638 cp->cp_eax = cp->cp_ebx = 0; 3639 /* 3640 * AMD Duron rev A0 reports L2 3641 * cache size incorrectly as 1K 3642 * when it is really 64K 3643 */ 3644 if (cpi->cpi_family == 6 && 3645 cpi->cpi_model == 3 && 3646 cpi->cpi_step == 0) { 3647 cp->cp_ecx &= 0xffff; 3648 cp->cp_ecx |= 0x400000; 3649 } 3650 break; 3651 case X86_VENDOR_Cyrix: /* VIA C3 */ 3652 /* 3653 * VIA C3 processors are a bit messed 3654 * up w.r.t. encoding cache sizes in %ecx 3655 */ 3656 if (cpi->cpi_family != 6) 3657 break; 3658 /* 3659 * model 7 and 8 were incorrectly encoded 3660 * 3661 * xxx is model 8 really broken? 3662 */ 3663 if (cpi->cpi_model == 7 || 3664 cpi->cpi_model == 8) 3665 cp->cp_ecx = 3666 BITX(cp->cp_ecx, 31, 24) << 16 | 3667 BITX(cp->cp_ecx, 23, 16) << 12 | 3668 BITX(cp->cp_ecx, 15, 8) << 8 | 3669 BITX(cp->cp_ecx, 7, 0); 3670 /* 3671 * model 9 stepping 1 has wrong associativity 3672 */ 3673 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 3674 cp->cp_ecx |= 8 << 12; 3675 break; 3676 case X86_VENDOR_Intel: 3677 /* 3678 * Extended L2 Cache features function. 3679 * First appeared on Prescott. 3680 */ 3681 default: 3682 break; 3683 } 3684 break; 3685 default: 3686 break; 3687 } 3688 } 3689 3690 pass2_done: 3691 cpi->cpi_pass = 2; 3692 } 3693 3694 static const char * 3695 intel_cpubrand(const struct cpuid_info *cpi) 3696 { 3697 int i; 3698 3699 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 3700 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 3701 return ("i486"); 3702 3703 switch (cpi->cpi_family) { 3704 case 5: 3705 return ("Intel Pentium(r)"); 3706 case 6: 3707 switch (cpi->cpi_model) { 3708 uint_t celeron, xeon; 3709 const struct cpuid_regs *cp; 3710 case 0: 3711 case 1: 3712 case 2: 3713 return ("Intel Pentium(r) Pro"); 3714 case 3: 3715 case 4: 3716 return ("Intel Pentium(r) II"); 3717 case 6: 3718 return ("Intel Celeron(r)"); 3719 case 5: 3720 case 7: 3721 celeron = xeon = 0; 3722 cp = &cpi->cpi_std[2]; /* cache info */ 3723 3724 for (i = 1; i < 4; i++) { 3725 uint_t tmp; 3726 3727 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 3728 if (tmp == 0x40) 3729 celeron++; 3730 if (tmp >= 0x44 && tmp <= 0x45) 3731 xeon++; 3732 } 3733 3734 for (i = 0; i < 2; i++) { 3735 uint_t tmp; 3736 3737 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 3738 if (tmp == 0x40) 3739 celeron++; 3740 else if (tmp >= 0x44 && tmp <= 0x45) 3741 xeon++; 3742 } 3743 3744 for (i = 0; i < 4; i++) { 3745 uint_t tmp; 3746 3747 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 3748 if (tmp == 0x40) 3749 celeron++; 3750 else if (tmp >= 0x44 && tmp <= 0x45) 3751 xeon++; 3752 } 3753 3754 for (i = 0; i < 4; i++) { 3755 uint_t tmp; 3756 3757 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 3758 if (tmp == 0x40) 3759 celeron++; 3760 else if (tmp >= 0x44 && tmp <= 0x45) 3761 xeon++; 3762 } 3763 3764 if (celeron) 3765 return ("Intel Celeron(r)"); 3766 if (xeon) 3767 return (cpi->cpi_model == 5 ? 3768 "Intel Pentium(r) II Xeon(tm)" : 3769 "Intel Pentium(r) III Xeon(tm)"); 3770 return (cpi->cpi_model == 5 ? 3771 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 3772 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 3773 default: 3774 break; 3775 } 3776 default: 3777 break; 3778 } 3779 3780 /* BrandID is present if the field is nonzero */ 3781 if (cpi->cpi_brandid != 0) { 3782 static const struct { 3783 uint_t bt_bid; 3784 const char *bt_str; 3785 } brand_tbl[] = { 3786 { 0x1, "Intel(r) Celeron(r)" }, 3787 { 0x2, "Intel(r) Pentium(r) III" }, 3788 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 3789 { 0x4, "Intel(r) Pentium(r) III" }, 3790 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 3791 { 0x7, "Mobile Intel(r) Celeron(r)" }, 3792 { 0x8, "Intel(r) Pentium(r) 4" }, 3793 { 0x9, "Intel(r) Pentium(r) 4" }, 3794 { 0xa, "Intel(r) Celeron(r)" }, 3795 { 0xb, "Intel(r) Xeon(tm)" }, 3796 { 0xc, "Intel(r) Xeon(tm) MP" }, 3797 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 3798 { 0xf, "Mobile Intel(r) Celeron(r)" }, 3799 { 0x11, "Mobile Genuine Intel(r)" }, 3800 { 0x12, "Intel(r) Celeron(r) M" }, 3801 { 0x13, "Mobile Intel(r) Celeron(r)" }, 3802 { 0x14, "Intel(r) Celeron(r)" }, 3803 { 0x15, "Mobile Genuine Intel(r)" }, 3804 { 0x16, "Intel(r) Pentium(r) M" }, 3805 { 0x17, "Mobile Intel(r) Celeron(r)" } 3806 }; 3807 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 3808 uint_t sgn; 3809 3810 sgn = (cpi->cpi_family << 8) | 3811 (cpi->cpi_model << 4) | cpi->cpi_step; 3812 3813 for (i = 0; i < btblmax; i++) 3814 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 3815 break; 3816 if (i < btblmax) { 3817 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 3818 return ("Intel(r) Celeron(r)"); 3819 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 3820 return ("Intel(r) Xeon(tm) MP"); 3821 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 3822 return ("Intel(r) Xeon(tm)"); 3823 return (brand_tbl[i].bt_str); 3824 } 3825 } 3826 3827 return (NULL); 3828 } 3829 3830 static const char * 3831 amd_cpubrand(const struct cpuid_info *cpi) 3832 { 3833 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 3834 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 3835 return ("i486 compatible"); 3836 3837 switch (cpi->cpi_family) { 3838 case 5: 3839 switch (cpi->cpi_model) { 3840 case 0: 3841 case 1: 3842 case 2: 3843 case 3: 3844 case 4: 3845 case 5: 3846 return ("AMD-K5(r)"); 3847 case 6: 3848 case 7: 3849 return ("AMD-K6(r)"); 3850 case 8: 3851 return ("AMD-K6(r)-2"); 3852 case 9: 3853 return ("AMD-K6(r)-III"); 3854 default: 3855 return ("AMD (family 5)"); 3856 } 3857 case 6: 3858 switch (cpi->cpi_model) { 3859 case 1: 3860 return ("AMD-K7(tm)"); 3861 case 0: 3862 case 2: 3863 case 4: 3864 return ("AMD Athlon(tm)"); 3865 case 3: 3866 case 7: 3867 return ("AMD Duron(tm)"); 3868 case 6: 3869 case 8: 3870 case 10: 3871 /* 3872 * Use the L2 cache size to distinguish 3873 */ 3874 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 3875 "AMD Athlon(tm)" : "AMD Duron(tm)"); 3876 default: 3877 return ("AMD (family 6)"); 3878 } 3879 default: 3880 break; 3881 } 3882 3883 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 3884 cpi->cpi_brandid != 0) { 3885 switch (BITX(cpi->cpi_brandid, 7, 5)) { 3886 case 3: 3887 return ("AMD Opteron(tm) UP 1xx"); 3888 case 4: 3889 return ("AMD Opteron(tm) DP 2xx"); 3890 case 5: 3891 return ("AMD Opteron(tm) MP 8xx"); 3892 default: 3893 return ("AMD Opteron(tm)"); 3894 } 3895 } 3896 3897 return (NULL); 3898 } 3899 3900 static const char * 3901 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 3902 { 3903 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 3904 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 3905 type == X86_TYPE_CYRIX_486) 3906 return ("i486 compatible"); 3907 3908 switch (type) { 3909 case X86_TYPE_CYRIX_6x86: 3910 return ("Cyrix 6x86"); 3911 case X86_TYPE_CYRIX_6x86L: 3912 return ("Cyrix 6x86L"); 3913 case X86_TYPE_CYRIX_6x86MX: 3914 return ("Cyrix 6x86MX"); 3915 case X86_TYPE_CYRIX_GXm: 3916 return ("Cyrix GXm"); 3917 case X86_TYPE_CYRIX_MediaGX: 3918 return ("Cyrix MediaGX"); 3919 case X86_TYPE_CYRIX_MII: 3920 return ("Cyrix M2"); 3921 case X86_TYPE_VIA_CYRIX_III: 3922 return ("VIA Cyrix M3"); 3923 default: 3924 /* 3925 * Have another wild guess .. 3926 */ 3927 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 3928 return ("Cyrix 5x86"); 3929 else if (cpi->cpi_family == 5) { 3930 switch (cpi->cpi_model) { 3931 case 2: 3932 return ("Cyrix 6x86"); /* Cyrix M1 */ 3933 case 4: 3934 return ("Cyrix MediaGX"); 3935 default: 3936 break; 3937 } 3938 } else if (cpi->cpi_family == 6) { 3939 switch (cpi->cpi_model) { 3940 case 0: 3941 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 3942 case 5: 3943 case 6: 3944 case 7: 3945 case 8: 3946 case 9: 3947 return ("VIA C3"); 3948 default: 3949 break; 3950 } 3951 } 3952 break; 3953 } 3954 return (NULL); 3955 } 3956 3957 /* 3958 * This only gets called in the case that the CPU extended 3959 * feature brand string (0x80000002, 0x80000003, 0x80000004) 3960 * aren't available, or contain null bytes for some reason. 3961 */ 3962 static void 3963 fabricate_brandstr(struct cpuid_info *cpi) 3964 { 3965 const char *brand = NULL; 3966 3967 switch (cpi->cpi_vendor) { 3968 case X86_VENDOR_Intel: 3969 brand = intel_cpubrand(cpi); 3970 break; 3971 case X86_VENDOR_AMD: 3972 brand = amd_cpubrand(cpi); 3973 break; 3974 case X86_VENDOR_Cyrix: 3975 brand = cyrix_cpubrand(cpi, x86_type); 3976 break; 3977 case X86_VENDOR_NexGen: 3978 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 3979 brand = "NexGen Nx586"; 3980 break; 3981 case X86_VENDOR_Centaur: 3982 if (cpi->cpi_family == 5) 3983 switch (cpi->cpi_model) { 3984 case 4: 3985 brand = "Centaur C6"; 3986 break; 3987 case 8: 3988 brand = "Centaur C2"; 3989 break; 3990 case 9: 3991 brand = "Centaur C3"; 3992 break; 3993 default: 3994 break; 3995 } 3996 break; 3997 case X86_VENDOR_Rise: 3998 if (cpi->cpi_family == 5 && 3999 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 4000 brand = "Rise mP6"; 4001 break; 4002 case X86_VENDOR_SiS: 4003 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 4004 brand = "SiS 55x"; 4005 break; 4006 case X86_VENDOR_TM: 4007 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 4008 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 4009 break; 4010 case X86_VENDOR_NSC: 4011 case X86_VENDOR_UMC: 4012 default: 4013 break; 4014 } 4015 if (brand) { 4016 (void) strcpy((char *)cpi->cpi_brandstr, brand); 4017 return; 4018 } 4019 4020 /* 4021 * If all else fails ... 4022 */ 4023 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 4024 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 4025 cpi->cpi_model, cpi->cpi_step); 4026 } 4027 4028 /* 4029 * This routine is called just after kernel memory allocation 4030 * becomes available on cpu0, and as part of mp_startup() on 4031 * the other cpus. 4032 * 4033 * Fixup the brand string, and collect any information from cpuid 4034 * that requires dynamically allocated storage to represent. 4035 */ 4036 /*ARGSUSED*/ 4037 void 4038 cpuid_pass3(cpu_t *cpu) 4039 { 4040 int i, max, shft, level, size; 4041 struct cpuid_regs regs; 4042 struct cpuid_regs *cp; 4043 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4044 4045 ASSERT(cpi->cpi_pass == 2); 4046 4047 /* 4048 * Deterministic cache parameters 4049 * 4050 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The 4051 * values that are present are currently defined to be the same. This 4052 * means we can use the same logic to parse it as long as we use the 4053 * appropriate leaf to get the data. If you're updating this, make sure 4054 * you're careful about which vendor supports which aspect. 4055 * 4056 * Take this opportunity to detect the number of threads sharing the 4057 * last level cache, and construct a corresponding cache id. The 4058 * respective cpuid_info members are initialized to the default case of 4059 * "no last level cache sharing". 4060 */ 4061 cpi->cpi_ncpu_shr_last_cache = 1; 4062 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 4063 4064 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) || 4065 (cpi->cpi_vendor == X86_VENDOR_AMD && 4066 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d && 4067 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) { 4068 uint32_t leaf; 4069 4070 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 4071 leaf = 4; 4072 } else { 4073 leaf = CPUID_LEAF_EXT_1d; 4074 } 4075 4076 /* 4077 * Find the # of elements (size) returned by the leaf and along 4078 * the way detect last level cache sharing details. 4079 */ 4080 bzero(®s, sizeof (regs)); 4081 cp = ®s; 4082 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 4083 cp->cp_eax = leaf; 4084 cp->cp_ecx = i; 4085 4086 (void) __cpuid_insn(cp); 4087 4088 if (CPI_CACHE_TYPE(cp) == 0) 4089 break; 4090 level = CPI_CACHE_LVL(cp); 4091 if (level > max) { 4092 max = level; 4093 cpi->cpi_ncpu_shr_last_cache = 4094 CPI_NTHR_SHR_CACHE(cp) + 1; 4095 } 4096 } 4097 cpi->cpi_cache_leaf_size = size = i; 4098 4099 /* 4100 * Allocate the cpi_cache_leaves array. The first element 4101 * references the regs for the corresponding leaf with %ecx set 4102 * to 0. This was gathered in cpuid_pass2(). 4103 */ 4104 if (size > 0) { 4105 cpi->cpi_cache_leaves = 4106 kmem_alloc(size * sizeof (cp), KM_SLEEP); 4107 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 4108 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4]; 4109 } else { 4110 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d]; 4111 } 4112 4113 /* 4114 * Allocate storage to hold the additional regs 4115 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size. 4116 * 4117 * The regs for the leaf, %ecx == 0 has already 4118 * been allocated as indicated above. 4119 */ 4120 for (i = 1; i < size; i++) { 4121 cp = cpi->cpi_cache_leaves[i] = 4122 kmem_zalloc(sizeof (regs), KM_SLEEP); 4123 cp->cp_eax = leaf; 4124 cp->cp_ecx = i; 4125 4126 (void) __cpuid_insn(cp); 4127 } 4128 } 4129 /* 4130 * Determine the number of bits needed to represent 4131 * the number of CPUs sharing the last level cache. 4132 * 4133 * Shift off that number of bits from the APIC id to 4134 * derive the cache id. 4135 */ 4136 shft = 0; 4137 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 4138 shft++; 4139 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 4140 } 4141 4142 /* 4143 * Now fixup the brand string 4144 */ 4145 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) { 4146 fabricate_brandstr(cpi); 4147 } else { 4148 4149 /* 4150 * If we successfully extracted a brand string from the cpuid 4151 * instruction, clean it up by removing leading spaces and 4152 * similar junk. 4153 */ 4154 if (cpi->cpi_brandstr[0]) { 4155 size_t maxlen = sizeof (cpi->cpi_brandstr); 4156 char *src, *dst; 4157 4158 dst = src = (char *)cpi->cpi_brandstr; 4159 src[maxlen - 1] = '\0'; 4160 /* 4161 * strip leading spaces 4162 */ 4163 while (*src == ' ') 4164 src++; 4165 /* 4166 * Remove any 'Genuine' or "Authentic" prefixes 4167 */ 4168 if (strncmp(src, "Genuine ", 8) == 0) 4169 src += 8; 4170 if (strncmp(src, "Authentic ", 10) == 0) 4171 src += 10; 4172 4173 /* 4174 * Now do an in-place copy. 4175 * Map (R) to (r) and (TM) to (tm). 4176 * The era of teletypes is long gone, and there's 4177 * -really- no need to shout. 4178 */ 4179 while (*src != '\0') { 4180 if (src[0] == '(') { 4181 if (strncmp(src + 1, "R)", 2) == 0) { 4182 (void) strncpy(dst, "(r)", 3); 4183 src += 3; 4184 dst += 3; 4185 continue; 4186 } 4187 if (strncmp(src + 1, "TM)", 3) == 0) { 4188 (void) strncpy(dst, "(tm)", 4); 4189 src += 4; 4190 dst += 4; 4191 continue; 4192 } 4193 } 4194 *dst++ = *src++; 4195 } 4196 *dst = '\0'; 4197 4198 /* 4199 * Finally, remove any trailing spaces 4200 */ 4201 while (--dst > cpi->cpi_brandstr) 4202 if (*dst == ' ') 4203 *dst = '\0'; 4204 else 4205 break; 4206 } else 4207 fabricate_brandstr(cpi); 4208 } 4209 cpi->cpi_pass = 3; 4210 } 4211 4212 /* 4213 * This routine is called out of bind_hwcap() much later in the life 4214 * of the kernel (post_startup()). The job of this routine is to resolve 4215 * the hardware feature support and kernel support for those features into 4216 * what we're actually going to tell applications via the aux vector. 4217 */ 4218 void 4219 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) 4220 { 4221 struct cpuid_info *cpi; 4222 uint_t hwcap_flags = 0, hwcap_flags_2 = 0; 4223 4224 if (cpu == NULL) 4225 cpu = CPU; 4226 cpi = cpu->cpu_m.mcpu_cpi; 4227 4228 ASSERT(cpi->cpi_pass == 3); 4229 4230 if (cpi->cpi_maxeax >= 1) { 4231 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 4232 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 4233 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES]; 4234 4235 *edx = CPI_FEATURES_EDX(cpi); 4236 *ecx = CPI_FEATURES_ECX(cpi); 4237 *ebx = CPI_FEATURES_7_0_EBX(cpi); 4238 4239 /* 4240 * [these require explicit kernel support] 4241 */ 4242 if (!is_x86_feature(x86_featureset, X86FSET_SEP)) 4243 *edx &= ~CPUID_INTC_EDX_SEP; 4244 4245 if (!is_x86_feature(x86_featureset, X86FSET_SSE)) 4246 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 4247 if (!is_x86_feature(x86_featureset, X86FSET_SSE2)) 4248 *edx &= ~CPUID_INTC_EDX_SSE2; 4249 4250 if (!is_x86_feature(x86_featureset, X86FSET_HTT)) 4251 *edx &= ~CPUID_INTC_EDX_HTT; 4252 4253 if (!is_x86_feature(x86_featureset, X86FSET_SSE3)) 4254 *ecx &= ~CPUID_INTC_ECX_SSE3; 4255 4256 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3)) 4257 *ecx &= ~CPUID_INTC_ECX_SSSE3; 4258 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1)) 4259 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 4260 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2)) 4261 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 4262 if (!is_x86_feature(x86_featureset, X86FSET_AES)) 4263 *ecx &= ~CPUID_INTC_ECX_AES; 4264 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) 4265 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; 4266 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE)) 4267 *ecx &= ~(CPUID_INTC_ECX_XSAVE | 4268 CPUID_INTC_ECX_OSXSAVE); 4269 if (!is_x86_feature(x86_featureset, X86FSET_AVX)) 4270 *ecx &= ~CPUID_INTC_ECX_AVX; 4271 if (!is_x86_feature(x86_featureset, X86FSET_F16C)) 4272 *ecx &= ~CPUID_INTC_ECX_F16C; 4273 if (!is_x86_feature(x86_featureset, X86FSET_FMA)) 4274 *ecx &= ~CPUID_INTC_ECX_FMA; 4275 if (!is_x86_feature(x86_featureset, X86FSET_BMI1)) 4276 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 4277 if (!is_x86_feature(x86_featureset, X86FSET_BMI2)) 4278 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 4279 if (!is_x86_feature(x86_featureset, X86FSET_AVX2)) 4280 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 4281 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED)) 4282 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED; 4283 if (!is_x86_feature(x86_featureset, X86FSET_ADX)) 4284 *ebx &= ~CPUID_INTC_EBX_7_0_ADX; 4285 4286 /* 4287 * [no explicit support required beyond x87 fp context] 4288 */ 4289 if (!fpu_exists) 4290 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 4291 4292 /* 4293 * Now map the supported feature vector to things that we 4294 * think userland will care about. 4295 */ 4296 if (*edx & CPUID_INTC_EDX_SEP) 4297 hwcap_flags |= AV_386_SEP; 4298 if (*edx & CPUID_INTC_EDX_SSE) 4299 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 4300 if (*edx & CPUID_INTC_EDX_SSE2) 4301 hwcap_flags |= AV_386_SSE2; 4302 if (*ecx & CPUID_INTC_ECX_SSE3) 4303 hwcap_flags |= AV_386_SSE3; 4304 if (*ecx & CPUID_INTC_ECX_SSSE3) 4305 hwcap_flags |= AV_386_SSSE3; 4306 if (*ecx & CPUID_INTC_ECX_SSE4_1) 4307 hwcap_flags |= AV_386_SSE4_1; 4308 if (*ecx & CPUID_INTC_ECX_SSE4_2) 4309 hwcap_flags |= AV_386_SSE4_2; 4310 if (*ecx & CPUID_INTC_ECX_MOVBE) 4311 hwcap_flags |= AV_386_MOVBE; 4312 if (*ecx & CPUID_INTC_ECX_AES) 4313 hwcap_flags |= AV_386_AES; 4314 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 4315 hwcap_flags |= AV_386_PCLMULQDQ; 4316 if ((*ecx & CPUID_INTC_ECX_XSAVE) && 4317 (*ecx & CPUID_INTC_ECX_OSXSAVE)) { 4318 hwcap_flags |= AV_386_XSAVE; 4319 4320 if (*ecx & CPUID_INTC_ECX_AVX) { 4321 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi); 4322 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi); 4323 4324 hwcap_flags |= AV_386_AVX; 4325 if (*ecx & CPUID_INTC_ECX_F16C) 4326 hwcap_flags_2 |= AV_386_2_F16C; 4327 if (*ecx & CPUID_INTC_ECX_FMA) 4328 hwcap_flags_2 |= AV_386_2_FMA; 4329 4330 if (*ebx & CPUID_INTC_EBX_7_0_BMI1) 4331 hwcap_flags_2 |= AV_386_2_BMI1; 4332 if (*ebx & CPUID_INTC_EBX_7_0_BMI2) 4333 hwcap_flags_2 |= AV_386_2_BMI2; 4334 if (*ebx & CPUID_INTC_EBX_7_0_AVX2) 4335 hwcap_flags_2 |= AV_386_2_AVX2; 4336 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F) 4337 hwcap_flags_2 |= AV_386_2_AVX512F; 4338 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ) 4339 hwcap_flags_2 |= AV_386_2_AVX512DQ; 4340 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA) 4341 hwcap_flags_2 |= AV_386_2_AVX512IFMA; 4342 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF) 4343 hwcap_flags_2 |= AV_386_2_AVX512PF; 4344 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER) 4345 hwcap_flags_2 |= AV_386_2_AVX512ER; 4346 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD) 4347 hwcap_flags_2 |= AV_386_2_AVX512CD; 4348 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW) 4349 hwcap_flags_2 |= AV_386_2_AVX512BW; 4350 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL) 4351 hwcap_flags_2 |= AV_386_2_AVX512VL; 4352 4353 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI) 4354 hwcap_flags_2 |= AV_386_2_AVX512VBMI; 4355 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI) 4356 hwcap_flags_2 |= AV_386_2_AVX512_VNNI; 4357 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ) 4358 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ; 4359 4360 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW) 4361 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW; 4362 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS) 4363 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS; 4364 } 4365 } 4366 if (*ecx & CPUID_INTC_ECX_VMX) 4367 hwcap_flags |= AV_386_VMX; 4368 if (*ecx & CPUID_INTC_ECX_POPCNT) 4369 hwcap_flags |= AV_386_POPCNT; 4370 if (*edx & CPUID_INTC_EDX_FPU) 4371 hwcap_flags |= AV_386_FPU; 4372 if (*edx & CPUID_INTC_EDX_MMX) 4373 hwcap_flags |= AV_386_MMX; 4374 4375 if (*edx & CPUID_INTC_EDX_TSC) 4376 hwcap_flags |= AV_386_TSC; 4377 if (*edx & CPUID_INTC_EDX_CX8) 4378 hwcap_flags |= AV_386_CX8; 4379 if (*edx & CPUID_INTC_EDX_CMOV) 4380 hwcap_flags |= AV_386_CMOV; 4381 if (*ecx & CPUID_INTC_ECX_CX16) 4382 hwcap_flags |= AV_386_CX16; 4383 4384 if (*ecx & CPUID_INTC_ECX_RDRAND) 4385 hwcap_flags_2 |= AV_386_2_RDRAND; 4386 if (*ebx & CPUID_INTC_EBX_7_0_ADX) 4387 hwcap_flags_2 |= AV_386_2_ADX; 4388 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED) 4389 hwcap_flags_2 |= AV_386_2_RDSEED; 4390 if (*ebx & CPUID_INTC_EBX_7_0_SHA) 4391 hwcap_flags_2 |= AV_386_2_SHA; 4392 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE) 4393 hwcap_flags_2 |= AV_386_2_FSGSBASE; 4394 if (*ebx & CPUID_INTC_EBX_7_0_CLWB) 4395 hwcap_flags_2 |= AV_386_2_CLWB; 4396 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT) 4397 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT; 4398 4399 } 4400 /* 4401 * Check a few miscilaneous features. 4402 */ 4403 if (is_x86_feature(x86_featureset, X86FSET_CLZERO)) 4404 hwcap_flags_2 |= AV_386_2_CLZERO; 4405 4406 if (cpi->cpi_xmaxeax < 0x80000001) 4407 goto pass4_done; 4408 4409 switch (cpi->cpi_vendor) { 4410 struct cpuid_regs cp; 4411 uint32_t *edx, *ecx; 4412 4413 case X86_VENDOR_Intel: 4414 /* 4415 * Seems like Intel duplicated what we necessary 4416 * here to make the initial crop of 64-bit OS's work. 4417 * Hopefully, those are the only "extended" bits 4418 * they'll add. 4419 */ 4420 /*FALLTHROUGH*/ 4421 4422 case X86_VENDOR_AMD: 4423 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 4424 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 4425 4426 *edx = CPI_FEATURES_XTD_EDX(cpi); 4427 *ecx = CPI_FEATURES_XTD_ECX(cpi); 4428 4429 /* 4430 * [these features require explicit kernel support] 4431 */ 4432 switch (cpi->cpi_vendor) { 4433 case X86_VENDOR_Intel: 4434 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 4435 *edx &= ~CPUID_AMD_EDX_TSCP; 4436 break; 4437 4438 case X86_VENDOR_AMD: 4439 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 4440 *edx &= ~CPUID_AMD_EDX_TSCP; 4441 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) 4442 *ecx &= ~CPUID_AMD_ECX_SSE4A; 4443 break; 4444 4445 default: 4446 break; 4447 } 4448 4449 /* 4450 * [no explicit support required beyond 4451 * x87 fp context and exception handlers] 4452 */ 4453 if (!fpu_exists) 4454 *edx &= ~(CPUID_AMD_EDX_MMXamd | 4455 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 4456 4457 if (!is_x86_feature(x86_featureset, X86FSET_NX)) 4458 *edx &= ~CPUID_AMD_EDX_NX; 4459 #if !defined(__amd64) 4460 *edx &= ~CPUID_AMD_EDX_LM; 4461 #endif 4462 /* 4463 * Now map the supported feature vector to 4464 * things that we think userland will care about. 4465 */ 4466 #if defined(__amd64) 4467 if (*edx & CPUID_AMD_EDX_SYSC) 4468 hwcap_flags |= AV_386_AMD_SYSC; 4469 #endif 4470 if (*edx & CPUID_AMD_EDX_MMXamd) 4471 hwcap_flags |= AV_386_AMD_MMX; 4472 if (*edx & CPUID_AMD_EDX_3DNow) 4473 hwcap_flags |= AV_386_AMD_3DNow; 4474 if (*edx & CPUID_AMD_EDX_3DNowx) 4475 hwcap_flags |= AV_386_AMD_3DNowx; 4476 if (*ecx & CPUID_AMD_ECX_SVM) 4477 hwcap_flags |= AV_386_AMD_SVM; 4478 4479 switch (cpi->cpi_vendor) { 4480 case X86_VENDOR_AMD: 4481 if (*edx & CPUID_AMD_EDX_TSCP) 4482 hwcap_flags |= AV_386_TSCP; 4483 if (*ecx & CPUID_AMD_ECX_AHF64) 4484 hwcap_flags |= AV_386_AHF; 4485 if (*ecx & CPUID_AMD_ECX_SSE4A) 4486 hwcap_flags |= AV_386_AMD_SSE4A; 4487 if (*ecx & CPUID_AMD_ECX_LZCNT) 4488 hwcap_flags |= AV_386_AMD_LZCNT; 4489 if (*ecx & CPUID_AMD_ECX_MONITORX) 4490 hwcap_flags_2 |= AV_386_2_MONITORX; 4491 break; 4492 4493 case X86_VENDOR_Intel: 4494 if (*edx & CPUID_AMD_EDX_TSCP) 4495 hwcap_flags |= AV_386_TSCP; 4496 if (*ecx & CPUID_AMD_ECX_LZCNT) 4497 hwcap_flags |= AV_386_AMD_LZCNT; 4498 /* 4499 * Aarrgh. 4500 * Intel uses a different bit in the same word. 4501 */ 4502 if (*ecx & CPUID_INTC_ECX_AHF64) 4503 hwcap_flags |= AV_386_AHF; 4504 break; 4505 4506 default: 4507 break; 4508 } 4509 break; 4510 4511 case X86_VENDOR_TM: 4512 cp.cp_eax = 0x80860001; 4513 (void) __cpuid_insn(&cp); 4514 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 4515 break; 4516 4517 default: 4518 break; 4519 } 4520 4521 pass4_done: 4522 cpi->cpi_pass = 4; 4523 if (hwcap_out != NULL) { 4524 hwcap_out[0] = hwcap_flags; 4525 hwcap_out[1] = hwcap_flags_2; 4526 } 4527 } 4528 4529 4530 /* 4531 * Simulate the cpuid instruction using the data we previously 4532 * captured about this CPU. We try our best to return the truth 4533 * about the hardware, independently of kernel support. 4534 */ 4535 uint32_t 4536 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 4537 { 4538 struct cpuid_info *cpi; 4539 struct cpuid_regs *xcp; 4540 4541 if (cpu == NULL) 4542 cpu = CPU; 4543 cpi = cpu->cpu_m.mcpu_cpi; 4544 4545 ASSERT(cpuid_checkpass(cpu, 3)); 4546 4547 /* 4548 * CPUID data is cached in two separate places: cpi_std for standard 4549 * CPUID leaves , and cpi_extd for extended CPUID leaves. 4550 */ 4551 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) { 4552 xcp = &cpi->cpi_std[cp->cp_eax]; 4553 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 && 4554 cp->cp_eax <= cpi->cpi_xmaxeax && 4555 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) { 4556 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0]; 4557 } else { 4558 /* 4559 * The caller is asking for data from an input parameter which 4560 * the kernel has not cached. In this case we go fetch from 4561 * the hardware and return the data directly to the user. 4562 */ 4563 return (__cpuid_insn(cp)); 4564 } 4565 4566 cp->cp_eax = xcp->cp_eax; 4567 cp->cp_ebx = xcp->cp_ebx; 4568 cp->cp_ecx = xcp->cp_ecx; 4569 cp->cp_edx = xcp->cp_edx; 4570 return (cp->cp_eax); 4571 } 4572 4573 int 4574 cpuid_checkpass(cpu_t *cpu, int pass) 4575 { 4576 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 4577 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 4578 } 4579 4580 int 4581 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 4582 { 4583 ASSERT(cpuid_checkpass(cpu, 3)); 4584 4585 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 4586 } 4587 4588 int 4589 cpuid_is_cmt(cpu_t *cpu) 4590 { 4591 if (cpu == NULL) 4592 cpu = CPU; 4593 4594 ASSERT(cpuid_checkpass(cpu, 1)); 4595 4596 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 4597 } 4598 4599 /* 4600 * AMD and Intel both implement the 64-bit variant of the syscall 4601 * instruction (syscallq), so if there's -any- support for syscall, 4602 * cpuid currently says "yes, we support this". 4603 * 4604 * However, Intel decided to -not- implement the 32-bit variant of the 4605 * syscall instruction, so we provide a predicate to allow our caller 4606 * to test that subtlety here. 4607 * 4608 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 4609 * even in the case where the hardware would in fact support it. 4610 */ 4611 /*ARGSUSED*/ 4612 int 4613 cpuid_syscall32_insn(cpu_t *cpu) 4614 { 4615 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 4616 4617 #if !defined(__xpv) 4618 if (cpu == NULL) 4619 cpu = CPU; 4620 4621 /*CSTYLED*/ 4622 { 4623 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4624 4625 if (cpi->cpi_vendor == X86_VENDOR_AMD && 4626 cpi->cpi_xmaxeax >= 0x80000001 && 4627 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 4628 return (1); 4629 } 4630 #endif 4631 return (0); 4632 } 4633 4634 int 4635 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 4636 { 4637 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4638 4639 static const char fmt[] = 4640 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 4641 static const char fmt_ht[] = 4642 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 4643 4644 ASSERT(cpuid_checkpass(cpu, 1)); 4645 4646 if (cpuid_is_cmt(cpu)) 4647 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 4648 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 4649 cpi->cpi_family, cpi->cpi_model, 4650 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 4651 return (snprintf(s, n, fmt, 4652 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 4653 cpi->cpi_family, cpi->cpi_model, 4654 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 4655 } 4656 4657 const char * 4658 cpuid_getvendorstr(cpu_t *cpu) 4659 { 4660 ASSERT(cpuid_checkpass(cpu, 1)); 4661 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 4662 } 4663 4664 uint_t 4665 cpuid_getvendor(cpu_t *cpu) 4666 { 4667 ASSERT(cpuid_checkpass(cpu, 1)); 4668 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 4669 } 4670 4671 uint_t 4672 cpuid_getfamily(cpu_t *cpu) 4673 { 4674 ASSERT(cpuid_checkpass(cpu, 1)); 4675 return (cpu->cpu_m.mcpu_cpi->cpi_family); 4676 } 4677 4678 uint_t 4679 cpuid_getmodel(cpu_t *cpu) 4680 { 4681 ASSERT(cpuid_checkpass(cpu, 1)); 4682 return (cpu->cpu_m.mcpu_cpi->cpi_model); 4683 } 4684 4685 uint_t 4686 cpuid_get_ncpu_per_chip(cpu_t *cpu) 4687 { 4688 ASSERT(cpuid_checkpass(cpu, 1)); 4689 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 4690 } 4691 4692 uint_t 4693 cpuid_get_ncore_per_chip(cpu_t *cpu) 4694 { 4695 ASSERT(cpuid_checkpass(cpu, 1)); 4696 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 4697 } 4698 4699 uint_t 4700 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 4701 { 4702 ASSERT(cpuid_checkpass(cpu, 2)); 4703 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 4704 } 4705 4706 id_t 4707 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 4708 { 4709 ASSERT(cpuid_checkpass(cpu, 2)); 4710 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 4711 } 4712 4713 uint_t 4714 cpuid_getstep(cpu_t *cpu) 4715 { 4716 ASSERT(cpuid_checkpass(cpu, 1)); 4717 return (cpu->cpu_m.mcpu_cpi->cpi_step); 4718 } 4719 4720 uint_t 4721 cpuid_getsig(struct cpu *cpu) 4722 { 4723 ASSERT(cpuid_checkpass(cpu, 1)); 4724 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 4725 } 4726 4727 uint32_t 4728 cpuid_getchiprev(struct cpu *cpu) 4729 { 4730 ASSERT(cpuid_checkpass(cpu, 1)); 4731 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 4732 } 4733 4734 const char * 4735 cpuid_getchiprevstr(struct cpu *cpu) 4736 { 4737 ASSERT(cpuid_checkpass(cpu, 1)); 4738 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 4739 } 4740 4741 uint32_t 4742 cpuid_getsockettype(struct cpu *cpu) 4743 { 4744 ASSERT(cpuid_checkpass(cpu, 1)); 4745 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 4746 } 4747 4748 const char * 4749 cpuid_getsocketstr(cpu_t *cpu) 4750 { 4751 static const char *socketstr = NULL; 4752 struct cpuid_info *cpi; 4753 4754 ASSERT(cpuid_checkpass(cpu, 1)); 4755 cpi = cpu->cpu_m.mcpu_cpi; 4756 4757 /* Assume that socket types are the same across the system */ 4758 if (socketstr == NULL) 4759 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 4760 cpi->cpi_model, cpi->cpi_step); 4761 4762 4763 return (socketstr); 4764 } 4765 4766 int 4767 cpuid_get_chipid(cpu_t *cpu) 4768 { 4769 ASSERT(cpuid_checkpass(cpu, 1)); 4770 4771 if (cpuid_is_cmt(cpu)) 4772 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 4773 return (cpu->cpu_id); 4774 } 4775 4776 id_t 4777 cpuid_get_coreid(cpu_t *cpu) 4778 { 4779 ASSERT(cpuid_checkpass(cpu, 1)); 4780 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 4781 } 4782 4783 int 4784 cpuid_get_pkgcoreid(cpu_t *cpu) 4785 { 4786 ASSERT(cpuid_checkpass(cpu, 1)); 4787 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 4788 } 4789 4790 int 4791 cpuid_get_clogid(cpu_t *cpu) 4792 { 4793 ASSERT(cpuid_checkpass(cpu, 1)); 4794 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 4795 } 4796 4797 int 4798 cpuid_get_cacheid(cpu_t *cpu) 4799 { 4800 ASSERT(cpuid_checkpass(cpu, 1)); 4801 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 4802 } 4803 4804 uint_t 4805 cpuid_get_procnodeid(cpu_t *cpu) 4806 { 4807 ASSERT(cpuid_checkpass(cpu, 1)); 4808 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 4809 } 4810 4811 uint_t 4812 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 4813 { 4814 ASSERT(cpuid_checkpass(cpu, 1)); 4815 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 4816 } 4817 4818 uint_t 4819 cpuid_get_compunitid(cpu_t *cpu) 4820 { 4821 ASSERT(cpuid_checkpass(cpu, 1)); 4822 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid); 4823 } 4824 4825 uint_t 4826 cpuid_get_cores_per_compunit(cpu_t *cpu) 4827 { 4828 ASSERT(cpuid_checkpass(cpu, 1)); 4829 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit); 4830 } 4831 4832 /*ARGSUSED*/ 4833 int 4834 cpuid_have_cr8access(cpu_t *cpu) 4835 { 4836 #if defined(__amd64) 4837 return (1); 4838 #else 4839 struct cpuid_info *cpi; 4840 4841 ASSERT(cpu != NULL); 4842 cpi = cpu->cpu_m.mcpu_cpi; 4843 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 4844 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 4845 return (1); 4846 return (0); 4847 #endif 4848 } 4849 4850 uint32_t 4851 cpuid_get_apicid(cpu_t *cpu) 4852 { 4853 ASSERT(cpuid_checkpass(cpu, 1)); 4854 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 4855 return (UINT32_MAX); 4856 } else { 4857 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 4858 } 4859 } 4860 4861 void 4862 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 4863 { 4864 struct cpuid_info *cpi; 4865 4866 if (cpu == NULL) 4867 cpu = CPU; 4868 cpi = cpu->cpu_m.mcpu_cpi; 4869 4870 ASSERT(cpuid_checkpass(cpu, 1)); 4871 4872 if (pabits) 4873 *pabits = cpi->cpi_pabits; 4874 if (vabits) 4875 *vabits = cpi->cpi_vabits; 4876 } 4877 4878 size_t 4879 cpuid_get_xsave_size() 4880 { 4881 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size, 4882 sizeof (struct xsave_state))); 4883 } 4884 4885 /* 4886 * Return true if the CPUs on this system require 'pointer clearing' for the 4887 * floating point error pointer exception handling. In the past, this has been 4888 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to 4889 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO 4890 * feature bit and is reflected in the cpi_fp_amd_save member. 4891 */ 4892 boolean_t 4893 cpuid_need_fp_excp_handling() 4894 { 4895 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD && 4896 cpuid_info0.cpi_fp_amd_save != 0); 4897 } 4898 4899 /* 4900 * Returns the number of data TLB entries for a corresponding 4901 * pagesize. If it can't be computed, or isn't known, the 4902 * routine returns zero. If you ask about an architecturally 4903 * impossible pagesize, the routine will panic (so that the 4904 * hat implementor knows that things are inconsistent.) 4905 */ 4906 uint_t 4907 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 4908 { 4909 struct cpuid_info *cpi; 4910 uint_t dtlb_nent = 0; 4911 4912 if (cpu == NULL) 4913 cpu = CPU; 4914 cpi = cpu->cpu_m.mcpu_cpi; 4915 4916 ASSERT(cpuid_checkpass(cpu, 1)); 4917 4918 /* 4919 * Check the L2 TLB info 4920 */ 4921 if (cpi->cpi_xmaxeax >= 0x80000006) { 4922 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 4923 4924 switch (pagesize) { 4925 4926 case 4 * 1024: 4927 /* 4928 * All zero in the top 16 bits of the register 4929 * indicates a unified TLB. Size is in low 16 bits. 4930 */ 4931 if ((cp->cp_ebx & 0xffff0000) == 0) 4932 dtlb_nent = cp->cp_ebx & 0x0000ffff; 4933 else 4934 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 4935 break; 4936 4937 case 2 * 1024 * 1024: 4938 if ((cp->cp_eax & 0xffff0000) == 0) 4939 dtlb_nent = cp->cp_eax & 0x0000ffff; 4940 else 4941 dtlb_nent = BITX(cp->cp_eax, 27, 16); 4942 break; 4943 4944 default: 4945 panic("unknown L2 pagesize"); 4946 /*NOTREACHED*/ 4947 } 4948 } 4949 4950 if (dtlb_nent != 0) 4951 return (dtlb_nent); 4952 4953 /* 4954 * No L2 TLB support for this size, try L1. 4955 */ 4956 if (cpi->cpi_xmaxeax >= 0x80000005) { 4957 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 4958 4959 switch (pagesize) { 4960 case 4 * 1024: 4961 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 4962 break; 4963 case 2 * 1024 * 1024: 4964 dtlb_nent = BITX(cp->cp_eax, 23, 16); 4965 break; 4966 default: 4967 panic("unknown L1 d-TLB pagesize"); 4968 /*NOTREACHED*/ 4969 } 4970 } 4971 4972 return (dtlb_nent); 4973 } 4974 4975 /* 4976 * Return 0 if the erratum is not present or not applicable, positive 4977 * if it is, and negative if the status of the erratum is unknown. 4978 * 4979 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 4980 * Processors" #25759, Rev 3.57, August 2005 4981 */ 4982 int 4983 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 4984 { 4985 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4986 uint_t eax; 4987 4988 /* 4989 * Bail out if this CPU isn't an AMD CPU, or if it's 4990 * a legacy (32-bit) AMD CPU. 4991 */ 4992 if (cpi->cpi_vendor != X86_VENDOR_AMD || 4993 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 4994 cpi->cpi_family == 6) { 4995 return (0); 4996 } 4997 4998 eax = cpi->cpi_std[1].cp_eax; 4999 5000 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 5001 #define SH_B3(eax) (eax == 0xf51) 5002 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 5003 5004 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 5005 5006 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 5007 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 5008 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 5009 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 5010 5011 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 5012 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 5013 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 5014 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 5015 5016 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 5017 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 5018 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 5019 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 5020 #define BH_E4(eax) (eax == 0x20fb1) 5021 #define SH_E5(eax) (eax == 0x20f42) 5022 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 5023 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 5024 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 5025 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 5026 DH_E6(eax) || JH_E6(eax)) 5027 5028 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 5029 #define DR_B0(eax) (eax == 0x100f20) 5030 #define DR_B1(eax) (eax == 0x100f21) 5031 #define DR_BA(eax) (eax == 0x100f2a) 5032 #define DR_B2(eax) (eax == 0x100f22) 5033 #define DR_B3(eax) (eax == 0x100f23) 5034 #define RB_C0(eax) (eax == 0x100f40) 5035 5036 switch (erratum) { 5037 case 1: 5038 return (cpi->cpi_family < 0x10); 5039 case 51: /* what does the asterisk mean? */ 5040 return (B(eax) || SH_C0(eax) || CG(eax)); 5041 case 52: 5042 return (B(eax)); 5043 case 57: 5044 return (cpi->cpi_family <= 0x11); 5045 case 58: 5046 return (B(eax)); 5047 case 60: 5048 return (cpi->cpi_family <= 0x11); 5049 case 61: 5050 case 62: 5051 case 63: 5052 case 64: 5053 case 65: 5054 case 66: 5055 case 68: 5056 case 69: 5057 case 70: 5058 case 71: 5059 return (B(eax)); 5060 case 72: 5061 return (SH_B0(eax)); 5062 case 74: 5063 return (B(eax)); 5064 case 75: 5065 return (cpi->cpi_family < 0x10); 5066 case 76: 5067 return (B(eax)); 5068 case 77: 5069 return (cpi->cpi_family <= 0x11); 5070 case 78: 5071 return (B(eax) || SH_C0(eax)); 5072 case 79: 5073 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 5074 case 80: 5075 case 81: 5076 case 82: 5077 return (B(eax)); 5078 case 83: 5079 return (B(eax) || SH_C0(eax) || CG(eax)); 5080 case 85: 5081 return (cpi->cpi_family < 0x10); 5082 case 86: 5083 return (SH_C0(eax) || CG(eax)); 5084 case 88: 5085 #if !defined(__amd64) 5086 return (0); 5087 #else 5088 return (B(eax) || SH_C0(eax)); 5089 #endif 5090 case 89: 5091 return (cpi->cpi_family < 0x10); 5092 case 90: 5093 return (B(eax) || SH_C0(eax) || CG(eax)); 5094 case 91: 5095 case 92: 5096 return (B(eax) || SH_C0(eax)); 5097 case 93: 5098 return (SH_C0(eax)); 5099 case 94: 5100 return (B(eax) || SH_C0(eax) || CG(eax)); 5101 case 95: 5102 #if !defined(__amd64) 5103 return (0); 5104 #else 5105 return (B(eax) || SH_C0(eax)); 5106 #endif 5107 case 96: 5108 return (B(eax) || SH_C0(eax) || CG(eax)); 5109 case 97: 5110 case 98: 5111 return (SH_C0(eax) || CG(eax)); 5112 case 99: 5113 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5114 case 100: 5115 return (B(eax) || SH_C0(eax)); 5116 case 101: 5117 case 103: 5118 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5119 case 104: 5120 return (SH_C0(eax) || CG(eax) || D0(eax)); 5121 case 105: 5122 case 106: 5123 case 107: 5124 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5125 case 108: 5126 return (DH_CG(eax)); 5127 case 109: 5128 return (SH_C0(eax) || CG(eax) || D0(eax)); 5129 case 110: 5130 return (D0(eax) || EX(eax)); 5131 case 111: 5132 return (CG(eax)); 5133 case 112: 5134 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 5135 case 113: 5136 return (eax == 0x20fc0); 5137 case 114: 5138 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 5139 case 115: 5140 return (SH_E0(eax) || JH_E1(eax)); 5141 case 116: 5142 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 5143 case 117: 5144 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5145 case 118: 5146 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 5147 JH_E6(eax)); 5148 case 121: 5149 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 5150 case 122: 5151 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 5152 case 123: 5153 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 5154 case 131: 5155 return (cpi->cpi_family < 0x10); 5156 case 6336786: 5157 5158 /* 5159 * Test for AdvPowerMgmtInfo.TscPStateInvariant 5160 * if this is a K8 family or newer processor. We're testing for 5161 * this 'erratum' to determine whether or not we have a constant 5162 * TSC. 5163 * 5164 * Our current fix for this is to disable the C1-Clock ramping. 5165 * However, this doesn't work on newer processor families nor 5166 * does it work when virtualized as those devices don't exist. 5167 */ 5168 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) { 5169 return (0); 5170 } 5171 5172 if (CPI_FAMILY(cpi) == 0xf) { 5173 struct cpuid_regs regs; 5174 regs.cp_eax = 0x80000007; 5175 (void) __cpuid_insn(®s); 5176 return (!(regs.cp_edx & 0x100)); 5177 } 5178 return (0); 5179 case 6323525: 5180 /* 5181 * This erratum (K8 #147) is not present on family 10 and newer. 5182 */ 5183 if (cpi->cpi_family >= 0x10) { 5184 return (0); 5185 } 5186 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 5187 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 5188 5189 case 6671130: 5190 /* 5191 * check for processors (pre-Shanghai) that do not provide 5192 * optimal management of 1gb ptes in its tlb. 5193 */ 5194 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 5195 5196 case 298: 5197 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 5198 DR_B2(eax) || RB_C0(eax)); 5199 5200 case 721: 5201 #if defined(__amd64) 5202 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12); 5203 #else 5204 return (0); 5205 #endif 5206 5207 default: 5208 return (-1); 5209 5210 } 5211 } 5212 5213 /* 5214 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 5215 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 5216 */ 5217 int 5218 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 5219 { 5220 struct cpuid_info *cpi; 5221 uint_t osvwid; 5222 static int osvwfeature = -1; 5223 uint64_t osvwlength; 5224 5225 5226 cpi = cpu->cpu_m.mcpu_cpi; 5227 5228 /* confirm OSVW supported */ 5229 if (osvwfeature == -1) { 5230 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 5231 } else { 5232 /* assert that osvw feature setting is consistent on all cpus */ 5233 ASSERT(osvwfeature == 5234 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 5235 } 5236 if (!osvwfeature) 5237 return (-1); 5238 5239 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 5240 5241 switch (erratum) { 5242 case 298: /* osvwid is 0 */ 5243 osvwid = 0; 5244 if (osvwlength <= (uint64_t)osvwid) { 5245 /* osvwid 0 is unknown */ 5246 return (-1); 5247 } 5248 5249 /* 5250 * Check the OSVW STATUS MSR to determine the state 5251 * of the erratum where: 5252 * 0 - fixed by HW 5253 * 1 - BIOS has applied the workaround when BIOS 5254 * workaround is available. (Or for other errata, 5255 * OS workaround is required.) 5256 * For a value of 1, caller will confirm that the 5257 * erratum 298 workaround has indeed been applied by BIOS. 5258 * 5259 * A 1 may be set in cpus that have a HW fix 5260 * in a mixed cpu system. Regarding erratum 298: 5261 * In a multiprocessor platform, the workaround above 5262 * should be applied to all processors regardless of 5263 * silicon revision when an affected processor is 5264 * present. 5265 */ 5266 5267 return (rdmsr(MSR_AMD_OSVW_STATUS + 5268 (osvwid / OSVW_ID_CNT_PER_MSR)) & 5269 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 5270 5271 default: 5272 return (-1); 5273 } 5274 } 5275 5276 static const char assoc_str[] = "associativity"; 5277 static const char line_str[] = "line-size"; 5278 static const char size_str[] = "size"; 5279 5280 static void 5281 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 5282 uint32_t val) 5283 { 5284 char buf[128]; 5285 5286 /* 5287 * ndi_prop_update_int() is used because it is desirable for 5288 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 5289 */ 5290 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 5291 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 5292 } 5293 5294 /* 5295 * Intel-style cache/tlb description 5296 * 5297 * Standard cpuid level 2 gives a randomly ordered 5298 * selection of tags that index into a table that describes 5299 * cache and tlb properties. 5300 */ 5301 5302 static const char l1_icache_str[] = "l1-icache"; 5303 static const char l1_dcache_str[] = "l1-dcache"; 5304 static const char l2_cache_str[] = "l2-cache"; 5305 static const char l3_cache_str[] = "l3-cache"; 5306 static const char itlb4k_str[] = "itlb-4K"; 5307 static const char dtlb4k_str[] = "dtlb-4K"; 5308 static const char itlb2M_str[] = "itlb-2M"; 5309 static const char itlb4M_str[] = "itlb-4M"; 5310 static const char dtlb4M_str[] = "dtlb-4M"; 5311 static const char dtlb24_str[] = "dtlb0-2M-4M"; 5312 static const char itlb424_str[] = "itlb-4K-2M-4M"; 5313 static const char itlb24_str[] = "itlb-2M-4M"; 5314 static const char dtlb44_str[] = "dtlb-4K-4M"; 5315 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 5316 static const char sl2_cache_str[] = "sectored-l2-cache"; 5317 static const char itrace_str[] = "itrace-cache"; 5318 static const char sl3_cache_str[] = "sectored-l3-cache"; 5319 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 5320 5321 static const struct cachetab { 5322 uint8_t ct_code; 5323 uint8_t ct_assoc; 5324 uint16_t ct_line_size; 5325 size_t ct_size; 5326 const char *ct_label; 5327 } intel_ctab[] = { 5328 /* 5329 * maintain descending order! 5330 * 5331 * Codes ignored - Reason 5332 * ---------------------- 5333 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 5334 * f0H/f1H - Currently we do not interpret prefetch size by design 5335 */ 5336 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 5337 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 5338 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 5339 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 5340 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 5341 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 5342 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 5343 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 5344 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 5345 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 5346 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 5347 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 5348 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 5349 { 0xc0, 4, 0, 8, dtlb44_str }, 5350 { 0xba, 4, 0, 64, dtlb4k_str }, 5351 { 0xb4, 4, 0, 256, dtlb4k_str }, 5352 { 0xb3, 4, 0, 128, dtlb4k_str }, 5353 { 0xb2, 4, 0, 64, itlb4k_str }, 5354 { 0xb0, 4, 0, 128, itlb4k_str }, 5355 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 5356 { 0x86, 4, 64, 512*1024, l2_cache_str}, 5357 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 5358 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 5359 { 0x83, 8, 32, 512*1024, l2_cache_str}, 5360 { 0x82, 8, 32, 256*1024, l2_cache_str}, 5361 { 0x80, 8, 64, 512*1024, l2_cache_str}, 5362 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 5363 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 5364 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 5365 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 5366 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 5367 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 5368 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 5369 { 0x73, 8, 0, 64*1024, itrace_str}, 5370 { 0x72, 8, 0, 32*1024, itrace_str}, 5371 { 0x71, 8, 0, 16*1024, itrace_str}, 5372 { 0x70, 8, 0, 12*1024, itrace_str}, 5373 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 5374 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 5375 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 5376 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 5377 { 0x5d, 0, 0, 256, dtlb44_str}, 5378 { 0x5c, 0, 0, 128, dtlb44_str}, 5379 { 0x5b, 0, 0, 64, dtlb44_str}, 5380 { 0x5a, 4, 0, 32, dtlb24_str}, 5381 { 0x59, 0, 0, 16, dtlb4k_str}, 5382 { 0x57, 4, 0, 16, dtlb4k_str}, 5383 { 0x56, 4, 0, 16, dtlb4M_str}, 5384 { 0x55, 0, 0, 7, itlb24_str}, 5385 { 0x52, 0, 0, 256, itlb424_str}, 5386 { 0x51, 0, 0, 128, itlb424_str}, 5387 { 0x50, 0, 0, 64, itlb424_str}, 5388 { 0x4f, 0, 0, 32, itlb4k_str}, 5389 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 5390 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 5391 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 5392 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 5393 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 5394 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 5395 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 5396 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 5397 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 5398 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 5399 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 5400 { 0x43, 4, 32, 512*1024, l2_cache_str}, 5401 { 0x42, 4, 32, 256*1024, l2_cache_str}, 5402 { 0x41, 4, 32, 128*1024, l2_cache_str}, 5403 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 5404 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 5405 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 5406 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 5407 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 5408 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 5409 { 0x30, 8, 64, 32*1024, l1_icache_str}, 5410 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 5411 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 5412 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 5413 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 5414 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 5415 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 5416 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 5417 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 5418 { 0x0b, 4, 0, 4, itlb4M_str}, 5419 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 5420 { 0x08, 4, 32, 16*1024, l1_icache_str}, 5421 { 0x06, 4, 32, 8*1024, l1_icache_str}, 5422 { 0x05, 4, 0, 32, dtlb4M_str}, 5423 { 0x04, 4, 0, 8, dtlb4M_str}, 5424 { 0x03, 4, 0, 64, dtlb4k_str}, 5425 { 0x02, 4, 0, 2, itlb4M_str}, 5426 { 0x01, 4, 0, 32, itlb4k_str}, 5427 { 0 } 5428 }; 5429 5430 static const struct cachetab cyrix_ctab[] = { 5431 { 0x70, 4, 0, 32, "tlb-4K" }, 5432 { 0x80, 4, 16, 16*1024, "l1-cache" }, 5433 { 0 } 5434 }; 5435 5436 /* 5437 * Search a cache table for a matching entry 5438 */ 5439 static const struct cachetab * 5440 find_cacheent(const struct cachetab *ct, uint_t code) 5441 { 5442 if (code != 0) { 5443 for (; ct->ct_code != 0; ct++) 5444 if (ct->ct_code <= code) 5445 break; 5446 if (ct->ct_code == code) 5447 return (ct); 5448 } 5449 return (NULL); 5450 } 5451 5452 /* 5453 * Populate cachetab entry with L2 or L3 cache-information using 5454 * cpuid function 4. This function is called from intel_walk_cacheinfo() 5455 * when descriptor 0x49 is encountered. It returns 0 if no such cache 5456 * information is found. 5457 */ 5458 static int 5459 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 5460 { 5461 uint32_t level, i; 5462 int ret = 0; 5463 5464 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) { 5465 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]); 5466 5467 if (level == 2 || level == 3) { 5468 ct->ct_assoc = 5469 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1; 5470 ct->ct_line_size = 5471 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1; 5472 ct->ct_size = ct->ct_assoc * 5473 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) * 5474 ct->ct_line_size * 5475 (cpi->cpi_cache_leaves[i]->cp_ecx + 1); 5476 5477 if (level == 2) { 5478 ct->ct_label = l2_cache_str; 5479 } else if (level == 3) { 5480 ct->ct_label = l3_cache_str; 5481 } 5482 ret = 1; 5483 } 5484 } 5485 5486 return (ret); 5487 } 5488 5489 /* 5490 * Walk the cacheinfo descriptor, applying 'func' to every valid element 5491 * The walk is terminated if the walker returns non-zero. 5492 */ 5493 static void 5494 intel_walk_cacheinfo(struct cpuid_info *cpi, 5495 void *arg, int (*func)(void *, const struct cachetab *)) 5496 { 5497 const struct cachetab *ct; 5498 struct cachetab des_49_ct, des_b1_ct; 5499 uint8_t *dp; 5500 int i; 5501 5502 if ((dp = cpi->cpi_cacheinfo) == NULL) 5503 return; 5504 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 5505 /* 5506 * For overloaded descriptor 0x49 we use cpuid function 4 5507 * if supported by the current processor, to create 5508 * cache information. 5509 * For overloaded descriptor 0xb1 we use X86_PAE flag 5510 * to disambiguate the cache information. 5511 */ 5512 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 5513 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 5514 ct = &des_49_ct; 5515 } else if (*dp == 0xb1) { 5516 des_b1_ct.ct_code = 0xb1; 5517 des_b1_ct.ct_assoc = 4; 5518 des_b1_ct.ct_line_size = 0; 5519 if (is_x86_feature(x86_featureset, X86FSET_PAE)) { 5520 des_b1_ct.ct_size = 8; 5521 des_b1_ct.ct_label = itlb2M_str; 5522 } else { 5523 des_b1_ct.ct_size = 4; 5524 des_b1_ct.ct_label = itlb4M_str; 5525 } 5526 ct = &des_b1_ct; 5527 } else { 5528 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 5529 continue; 5530 } 5531 } 5532 5533 if (func(arg, ct) != 0) { 5534 break; 5535 } 5536 } 5537 } 5538 5539 /* 5540 * (Like the Intel one, except for Cyrix CPUs) 5541 */ 5542 static void 5543 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 5544 void *arg, int (*func)(void *, const struct cachetab *)) 5545 { 5546 const struct cachetab *ct; 5547 uint8_t *dp; 5548 int i; 5549 5550 if ((dp = cpi->cpi_cacheinfo) == NULL) 5551 return; 5552 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 5553 /* 5554 * Search Cyrix-specific descriptor table first .. 5555 */ 5556 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 5557 if (func(arg, ct) != 0) 5558 break; 5559 continue; 5560 } 5561 /* 5562 * .. else fall back to the Intel one 5563 */ 5564 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 5565 if (func(arg, ct) != 0) 5566 break; 5567 continue; 5568 } 5569 } 5570 } 5571 5572 /* 5573 * A cacheinfo walker that adds associativity, line-size, and size properties 5574 * to the devinfo node it is passed as an argument. 5575 */ 5576 static int 5577 add_cacheent_props(void *arg, const struct cachetab *ct) 5578 { 5579 dev_info_t *devi = arg; 5580 5581 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 5582 if (ct->ct_line_size != 0) 5583 add_cache_prop(devi, ct->ct_label, line_str, 5584 ct->ct_line_size); 5585 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 5586 return (0); 5587 } 5588 5589 5590 static const char fully_assoc[] = "fully-associative?"; 5591 5592 /* 5593 * AMD style cache/tlb description 5594 * 5595 * Extended functions 5 and 6 directly describe properties of 5596 * tlbs and various cache levels. 5597 */ 5598 static void 5599 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 5600 { 5601 switch (assoc) { 5602 case 0: /* reserved; ignore */ 5603 break; 5604 default: 5605 add_cache_prop(devi, label, assoc_str, assoc); 5606 break; 5607 case 0xff: 5608 add_cache_prop(devi, label, fully_assoc, 1); 5609 break; 5610 } 5611 } 5612 5613 static void 5614 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 5615 { 5616 if (size == 0) 5617 return; 5618 add_cache_prop(devi, label, size_str, size); 5619 add_amd_assoc(devi, label, assoc); 5620 } 5621 5622 static void 5623 add_amd_cache(dev_info_t *devi, const char *label, 5624 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 5625 { 5626 if (size == 0 || line_size == 0) 5627 return; 5628 add_amd_assoc(devi, label, assoc); 5629 /* 5630 * Most AMD parts have a sectored cache. Multiple cache lines are 5631 * associated with each tag. A sector consists of all cache lines 5632 * associated with a tag. For example, the AMD K6-III has a sector 5633 * size of 2 cache lines per tag. 5634 */ 5635 if (lines_per_tag != 0) 5636 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 5637 add_cache_prop(devi, label, line_str, line_size); 5638 add_cache_prop(devi, label, size_str, size * 1024); 5639 } 5640 5641 static void 5642 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 5643 { 5644 switch (assoc) { 5645 case 0: /* off */ 5646 break; 5647 case 1: 5648 case 2: 5649 case 4: 5650 add_cache_prop(devi, label, assoc_str, assoc); 5651 break; 5652 case 6: 5653 add_cache_prop(devi, label, assoc_str, 8); 5654 break; 5655 case 8: 5656 add_cache_prop(devi, label, assoc_str, 16); 5657 break; 5658 case 0xf: 5659 add_cache_prop(devi, label, fully_assoc, 1); 5660 break; 5661 default: /* reserved; ignore */ 5662 break; 5663 } 5664 } 5665 5666 static void 5667 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 5668 { 5669 if (size == 0 || assoc == 0) 5670 return; 5671 add_amd_l2_assoc(devi, label, assoc); 5672 add_cache_prop(devi, label, size_str, size); 5673 } 5674 5675 static void 5676 add_amd_l2_cache(dev_info_t *devi, const char *label, 5677 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 5678 { 5679 if (size == 0 || assoc == 0 || line_size == 0) 5680 return; 5681 add_amd_l2_assoc(devi, label, assoc); 5682 if (lines_per_tag != 0) 5683 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 5684 add_cache_prop(devi, label, line_str, line_size); 5685 add_cache_prop(devi, label, size_str, size * 1024); 5686 } 5687 5688 static void 5689 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 5690 { 5691 struct cpuid_regs *cp; 5692 5693 if (cpi->cpi_xmaxeax < 0x80000005) 5694 return; 5695 cp = &cpi->cpi_extd[5]; 5696 5697 /* 5698 * 4M/2M L1 TLB configuration 5699 * 5700 * We report the size for 2M pages because AMD uses two 5701 * TLB entries for one 4M page. 5702 */ 5703 add_amd_tlb(devi, "dtlb-2M", 5704 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 5705 add_amd_tlb(devi, "itlb-2M", 5706 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 5707 5708 /* 5709 * 4K L1 TLB configuration 5710 */ 5711 5712 switch (cpi->cpi_vendor) { 5713 uint_t nentries; 5714 case X86_VENDOR_TM: 5715 if (cpi->cpi_family >= 5) { 5716 /* 5717 * Crusoe processors have 256 TLB entries, but 5718 * cpuid data format constrains them to only 5719 * reporting 255 of them. 5720 */ 5721 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 5722 nentries = 256; 5723 /* 5724 * Crusoe processors also have a unified TLB 5725 */ 5726 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 5727 nentries); 5728 break; 5729 } 5730 /*FALLTHROUGH*/ 5731 default: 5732 add_amd_tlb(devi, itlb4k_str, 5733 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 5734 add_amd_tlb(devi, dtlb4k_str, 5735 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 5736 break; 5737 } 5738 5739 /* 5740 * data L1 cache configuration 5741 */ 5742 5743 add_amd_cache(devi, l1_dcache_str, 5744 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 5745 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 5746 5747 /* 5748 * code L1 cache configuration 5749 */ 5750 5751 add_amd_cache(devi, l1_icache_str, 5752 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 5753 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 5754 5755 if (cpi->cpi_xmaxeax < 0x80000006) 5756 return; 5757 cp = &cpi->cpi_extd[6]; 5758 5759 /* Check for a unified L2 TLB for large pages */ 5760 5761 if (BITX(cp->cp_eax, 31, 16) == 0) 5762 add_amd_l2_tlb(devi, "l2-tlb-2M", 5763 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5764 else { 5765 add_amd_l2_tlb(devi, "l2-dtlb-2M", 5766 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 5767 add_amd_l2_tlb(devi, "l2-itlb-2M", 5768 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5769 } 5770 5771 /* Check for a unified L2 TLB for 4K pages */ 5772 5773 if (BITX(cp->cp_ebx, 31, 16) == 0) { 5774 add_amd_l2_tlb(devi, "l2-tlb-4K", 5775 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5776 } else { 5777 add_amd_l2_tlb(devi, "l2-dtlb-4K", 5778 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 5779 add_amd_l2_tlb(devi, "l2-itlb-4K", 5780 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5781 } 5782 5783 add_amd_l2_cache(devi, l2_cache_str, 5784 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 5785 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 5786 } 5787 5788 /* 5789 * There are two basic ways that the x86 world describes it cache 5790 * and tlb architecture - Intel's way and AMD's way. 5791 * 5792 * Return which flavor of cache architecture we should use 5793 */ 5794 static int 5795 x86_which_cacheinfo(struct cpuid_info *cpi) 5796 { 5797 switch (cpi->cpi_vendor) { 5798 case X86_VENDOR_Intel: 5799 if (cpi->cpi_maxeax >= 2) 5800 return (X86_VENDOR_Intel); 5801 break; 5802 case X86_VENDOR_AMD: 5803 /* 5804 * The K5 model 1 was the first part from AMD that reported 5805 * cache sizes via extended cpuid functions. 5806 */ 5807 if (cpi->cpi_family > 5 || 5808 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 5809 return (X86_VENDOR_AMD); 5810 break; 5811 case X86_VENDOR_TM: 5812 if (cpi->cpi_family >= 5) 5813 return (X86_VENDOR_AMD); 5814 /*FALLTHROUGH*/ 5815 default: 5816 /* 5817 * If they have extended CPU data for 0x80000005 5818 * then we assume they have AMD-format cache 5819 * information. 5820 * 5821 * If not, and the vendor happens to be Cyrix, 5822 * then try our-Cyrix specific handler. 5823 * 5824 * If we're not Cyrix, then assume we're using Intel's 5825 * table-driven format instead. 5826 */ 5827 if (cpi->cpi_xmaxeax >= 0x80000005) 5828 return (X86_VENDOR_AMD); 5829 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 5830 return (X86_VENDOR_Cyrix); 5831 else if (cpi->cpi_maxeax >= 2) 5832 return (X86_VENDOR_Intel); 5833 break; 5834 } 5835 return (-1); 5836 } 5837 5838 void 5839 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 5840 struct cpuid_info *cpi) 5841 { 5842 dev_info_t *cpu_devi; 5843 int create; 5844 5845 cpu_devi = (dev_info_t *)dip; 5846 5847 /* device_type */ 5848 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 5849 "device_type", "cpu"); 5850 5851 /* reg */ 5852 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5853 "reg", cpu_id); 5854 5855 /* cpu-mhz, and clock-frequency */ 5856 if (cpu_freq > 0) { 5857 long long mul; 5858 5859 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5860 "cpu-mhz", cpu_freq); 5861 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 5862 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5863 "clock-frequency", (int)mul); 5864 } 5865 5866 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) { 5867 return; 5868 } 5869 5870 /* vendor-id */ 5871 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 5872 "vendor-id", cpi->cpi_vendorstr); 5873 5874 if (cpi->cpi_maxeax == 0) { 5875 return; 5876 } 5877 5878 /* 5879 * family, model, and step 5880 */ 5881 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5882 "family", CPI_FAMILY(cpi)); 5883 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5884 "cpu-model", CPI_MODEL(cpi)); 5885 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5886 "stepping-id", CPI_STEP(cpi)); 5887 5888 /* type */ 5889 switch (cpi->cpi_vendor) { 5890 case X86_VENDOR_Intel: 5891 create = 1; 5892 break; 5893 default: 5894 create = 0; 5895 break; 5896 } 5897 if (create) 5898 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5899 "type", CPI_TYPE(cpi)); 5900 5901 /* ext-family */ 5902 switch (cpi->cpi_vendor) { 5903 case X86_VENDOR_Intel: 5904 case X86_VENDOR_AMD: 5905 create = cpi->cpi_family >= 0xf; 5906 break; 5907 default: 5908 create = 0; 5909 break; 5910 } 5911 if (create) 5912 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5913 "ext-family", CPI_FAMILY_XTD(cpi)); 5914 5915 /* ext-model */ 5916 switch (cpi->cpi_vendor) { 5917 case X86_VENDOR_Intel: 5918 create = IS_EXTENDED_MODEL_INTEL(cpi); 5919 break; 5920 case X86_VENDOR_AMD: 5921 create = CPI_FAMILY(cpi) == 0xf; 5922 break; 5923 default: 5924 create = 0; 5925 break; 5926 } 5927 if (create) 5928 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5929 "ext-model", CPI_MODEL_XTD(cpi)); 5930 5931 /* generation */ 5932 switch (cpi->cpi_vendor) { 5933 case X86_VENDOR_AMD: 5934 /* 5935 * AMD K5 model 1 was the first part to support this 5936 */ 5937 create = cpi->cpi_xmaxeax >= 0x80000001; 5938 break; 5939 default: 5940 create = 0; 5941 break; 5942 } 5943 if (create) 5944 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5945 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 5946 5947 /* brand-id */ 5948 switch (cpi->cpi_vendor) { 5949 case X86_VENDOR_Intel: 5950 /* 5951 * brand id first appeared on Pentium III Xeon model 8, 5952 * and Celeron model 8 processors and Opteron 5953 */ 5954 create = cpi->cpi_family > 6 || 5955 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 5956 break; 5957 case X86_VENDOR_AMD: 5958 create = cpi->cpi_family >= 0xf; 5959 break; 5960 default: 5961 create = 0; 5962 break; 5963 } 5964 if (create && cpi->cpi_brandid != 0) { 5965 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5966 "brand-id", cpi->cpi_brandid); 5967 } 5968 5969 /* chunks, and apic-id */ 5970 switch (cpi->cpi_vendor) { 5971 /* 5972 * first available on Pentium IV and Opteron (K8) 5973 */ 5974 case X86_VENDOR_Intel: 5975 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 5976 break; 5977 case X86_VENDOR_AMD: 5978 create = cpi->cpi_family >= 0xf; 5979 break; 5980 default: 5981 create = 0; 5982 break; 5983 } 5984 if (create) { 5985 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5986 "chunks", CPI_CHUNKS(cpi)); 5987 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5988 "apic-id", cpi->cpi_apicid); 5989 if (cpi->cpi_chipid >= 0) { 5990 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5991 "chip#", cpi->cpi_chipid); 5992 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5993 "clog#", cpi->cpi_clogid); 5994 } 5995 } 5996 5997 /* cpuid-features */ 5998 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 5999 "cpuid-features", CPI_FEATURES_EDX(cpi)); 6000 6001 6002 /* cpuid-features-ecx */ 6003 switch (cpi->cpi_vendor) { 6004 case X86_VENDOR_Intel: 6005 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 6006 break; 6007 case X86_VENDOR_AMD: 6008 create = cpi->cpi_family >= 0xf; 6009 break; 6010 default: 6011 create = 0; 6012 break; 6013 } 6014 if (create) 6015 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6016 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 6017 6018 /* ext-cpuid-features */ 6019 switch (cpi->cpi_vendor) { 6020 case X86_VENDOR_Intel: 6021 case X86_VENDOR_AMD: 6022 case X86_VENDOR_Cyrix: 6023 case X86_VENDOR_TM: 6024 case X86_VENDOR_Centaur: 6025 create = cpi->cpi_xmaxeax >= 0x80000001; 6026 break; 6027 default: 6028 create = 0; 6029 break; 6030 } 6031 if (create) { 6032 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6033 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 6034 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6035 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 6036 } 6037 6038 /* 6039 * Brand String first appeared in Intel Pentium IV, AMD K5 6040 * model 1, and Cyrix GXm. On earlier models we try and 6041 * simulate something similar .. so this string should always 6042 * same -something- about the processor, however lame. 6043 */ 6044 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 6045 "brand-string", cpi->cpi_brandstr); 6046 6047 /* 6048 * Finally, cache and tlb information 6049 */ 6050 switch (x86_which_cacheinfo(cpi)) { 6051 case X86_VENDOR_Intel: 6052 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 6053 break; 6054 case X86_VENDOR_Cyrix: 6055 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 6056 break; 6057 case X86_VENDOR_AMD: 6058 amd_cache_info(cpi, cpu_devi); 6059 break; 6060 default: 6061 break; 6062 } 6063 } 6064 6065 struct l2info { 6066 int *l2i_csz; 6067 int *l2i_lsz; 6068 int *l2i_assoc; 6069 int l2i_ret; 6070 }; 6071 6072 /* 6073 * A cacheinfo walker that fetches the size, line-size and associativity 6074 * of the L2 cache 6075 */ 6076 static int 6077 intel_l2cinfo(void *arg, const struct cachetab *ct) 6078 { 6079 struct l2info *l2i = arg; 6080 int *ip; 6081 6082 if (ct->ct_label != l2_cache_str && 6083 ct->ct_label != sl2_cache_str) 6084 return (0); /* not an L2 -- keep walking */ 6085 6086 if ((ip = l2i->l2i_csz) != NULL) 6087 *ip = ct->ct_size; 6088 if ((ip = l2i->l2i_lsz) != NULL) 6089 *ip = ct->ct_line_size; 6090 if ((ip = l2i->l2i_assoc) != NULL) 6091 *ip = ct->ct_assoc; 6092 l2i->l2i_ret = ct->ct_size; 6093 return (1); /* was an L2 -- terminate walk */ 6094 } 6095 6096 /* 6097 * AMD L2/L3 Cache and TLB Associativity Field Definition: 6098 * 6099 * Unlike the associativity for the L1 cache and tlb where the 8 bit 6100 * value is the associativity, the associativity for the L2 cache and 6101 * tlb is encoded in the following table. The 4 bit L2 value serves as 6102 * an index into the amd_afd[] array to determine the associativity. 6103 * -1 is undefined. 0 is fully associative. 6104 */ 6105 6106 static int amd_afd[] = 6107 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 6108 6109 static void 6110 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 6111 { 6112 struct cpuid_regs *cp; 6113 uint_t size, assoc; 6114 int i; 6115 int *ip; 6116 6117 if (cpi->cpi_xmaxeax < 0x80000006) 6118 return; 6119 cp = &cpi->cpi_extd[6]; 6120 6121 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 6122 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 6123 uint_t cachesz = size * 1024; 6124 assoc = amd_afd[i]; 6125 6126 ASSERT(assoc != -1); 6127 6128 if ((ip = l2i->l2i_csz) != NULL) 6129 *ip = cachesz; 6130 if ((ip = l2i->l2i_lsz) != NULL) 6131 *ip = BITX(cp->cp_ecx, 7, 0); 6132 if ((ip = l2i->l2i_assoc) != NULL) 6133 *ip = assoc; 6134 l2i->l2i_ret = cachesz; 6135 } 6136 } 6137 6138 int 6139 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 6140 { 6141 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 6142 struct l2info __l2info, *l2i = &__l2info; 6143 6144 l2i->l2i_csz = csz; 6145 l2i->l2i_lsz = lsz; 6146 l2i->l2i_assoc = assoc; 6147 l2i->l2i_ret = -1; 6148 6149 switch (x86_which_cacheinfo(cpi)) { 6150 case X86_VENDOR_Intel: 6151 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 6152 break; 6153 case X86_VENDOR_Cyrix: 6154 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 6155 break; 6156 case X86_VENDOR_AMD: 6157 amd_l2cacheinfo(cpi, l2i); 6158 break; 6159 default: 6160 break; 6161 } 6162 return (l2i->l2i_ret); 6163 } 6164 6165 #if !defined(__xpv) 6166 6167 uint32_t * 6168 cpuid_mwait_alloc(cpu_t *cpu) 6169 { 6170 uint32_t *ret; 6171 size_t mwait_size; 6172 6173 ASSERT(cpuid_checkpass(CPU, 2)); 6174 6175 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 6176 if (mwait_size == 0) 6177 return (NULL); 6178 6179 /* 6180 * kmem_alloc() returns cache line size aligned data for mwait_size 6181 * allocations. mwait_size is currently cache line sized. Neither 6182 * of these implementation details are guarantied to be true in the 6183 * future. 6184 * 6185 * First try allocating mwait_size as kmem_alloc() currently returns 6186 * correctly aligned memory. If kmem_alloc() does not return 6187 * mwait_size aligned memory, then use mwait_size ROUNDUP. 6188 * 6189 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 6190 * decide to free this memory. 6191 */ 6192 ret = kmem_zalloc(mwait_size, KM_SLEEP); 6193 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 6194 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 6195 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 6196 *ret = MWAIT_RUNNING; 6197 return (ret); 6198 } else { 6199 kmem_free(ret, mwait_size); 6200 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 6201 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 6202 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 6203 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 6204 *ret = MWAIT_RUNNING; 6205 return (ret); 6206 } 6207 } 6208 6209 void 6210 cpuid_mwait_free(cpu_t *cpu) 6211 { 6212 if (cpu->cpu_m.mcpu_cpi == NULL) { 6213 return; 6214 } 6215 6216 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 6217 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 6218 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 6219 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 6220 } 6221 6222 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 6223 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 6224 } 6225 6226 void 6227 patch_tsc_read(int flag) 6228 { 6229 size_t cnt; 6230 6231 switch (flag) { 6232 case TSC_NONE: 6233 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 6234 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 6235 break; 6236 case TSC_RDTSC_MFENCE: 6237 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 6238 (void) memcpy((void *)tsc_read, 6239 (void *)&_tsc_mfence_start, cnt); 6240 break; 6241 case TSC_RDTSC_LFENCE: 6242 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 6243 (void) memcpy((void *)tsc_read, 6244 (void *)&_tsc_lfence_start, cnt); 6245 break; 6246 case TSC_TSCP: 6247 cnt = &_tscp_end - &_tscp_start; 6248 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 6249 break; 6250 default: 6251 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */ 6252 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag); 6253 break; 6254 } 6255 tsc_type = flag; 6256 } 6257 6258 int 6259 cpuid_deep_cstates_supported(void) 6260 { 6261 struct cpuid_info *cpi; 6262 struct cpuid_regs regs; 6263 6264 ASSERT(cpuid_checkpass(CPU, 1)); 6265 6266 cpi = CPU->cpu_m.mcpu_cpi; 6267 6268 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) 6269 return (0); 6270 6271 switch (cpi->cpi_vendor) { 6272 case X86_VENDOR_Intel: 6273 if (cpi->cpi_xmaxeax < 0x80000007) 6274 return (0); 6275 6276 /* 6277 * TSC run at a constant rate in all ACPI C-states? 6278 */ 6279 regs.cp_eax = 0x80000007; 6280 (void) __cpuid_insn(®s); 6281 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 6282 6283 default: 6284 return (0); 6285 } 6286 } 6287 6288 #endif /* !__xpv */ 6289 6290 void 6291 post_startup_cpu_fixups(void) 6292 { 6293 #ifndef __xpv 6294 /* 6295 * Some AMD processors support C1E state. Entering this state will 6296 * cause the local APIC timer to stop, which we can't deal with at 6297 * this time. 6298 */ 6299 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 6300 on_trap_data_t otd; 6301 uint64_t reg; 6302 6303 if (!on_trap(&otd, OT_DATA_ACCESS)) { 6304 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 6305 /* Disable C1E state if it is enabled by BIOS */ 6306 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 6307 AMD_ACTONCMPHALT_MASK) { 6308 reg &= ~(AMD_ACTONCMPHALT_MASK << 6309 AMD_ACTONCMPHALT_SHIFT); 6310 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 6311 } 6312 } 6313 no_trap(); 6314 } 6315 #endif /* !__xpv */ 6316 } 6317 6318 void 6319 enable_pcid(void) 6320 { 6321 if (x86_use_pcid == -1) 6322 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID); 6323 6324 if (x86_use_invpcid == -1) { 6325 x86_use_invpcid = is_x86_feature(x86_featureset, 6326 X86FSET_INVPCID); 6327 } 6328 6329 if (!x86_use_pcid) 6330 return; 6331 6332 /* 6333 * Intel say that on setting PCIDE, it immediately starts using the PCID 6334 * bits; better make sure there's nothing there. 6335 */ 6336 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE); 6337 6338 setcr4(getcr4() | CR4_PCIDE); 6339 } 6340 6341 /* 6342 * Setup necessary registers to enable XSAVE feature on this processor. 6343 * This function needs to be called early enough, so that no xsave/xrstor 6344 * ops will execute on the processor before the MSRs are properly set up. 6345 * 6346 * Current implementation has the following assumption: 6347 * - cpuid_pass1() is done, so that X86 features are known. 6348 * - fpu_probe() is done, so that fp_save_mech is chosen. 6349 */ 6350 void 6351 xsave_setup_msr(cpu_t *cpu) 6352 { 6353 ASSERT(fp_save_mech == FP_XSAVE); 6354 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 6355 6356 /* Enable OSXSAVE in CR4. */ 6357 setcr4(getcr4() | CR4_OSXSAVE); 6358 /* 6359 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report 6360 * correct value. 6361 */ 6362 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE; 6363 setup_xfem(); 6364 } 6365 6366 /* 6367 * Starting with the Westmere processor the local 6368 * APIC timer will continue running in all C-states, 6369 * including the deepest C-states. 6370 */ 6371 int 6372 cpuid_arat_supported(void) 6373 { 6374 struct cpuid_info *cpi; 6375 struct cpuid_regs regs; 6376 6377 ASSERT(cpuid_checkpass(CPU, 1)); 6378 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 6379 6380 cpi = CPU->cpu_m.mcpu_cpi; 6381 6382 switch (cpi->cpi_vendor) { 6383 case X86_VENDOR_Intel: 6384 /* 6385 * Always-running Local APIC Timer is 6386 * indicated by CPUID.6.EAX[2]. 6387 */ 6388 if (cpi->cpi_maxeax >= 6) { 6389 regs.cp_eax = 6; 6390 (void) cpuid_insn(NULL, ®s); 6391 return (regs.cp_eax & CPUID_CSTATE_ARAT); 6392 } else { 6393 return (0); 6394 } 6395 default: 6396 return (0); 6397 } 6398 } 6399 6400 /* 6401 * Check support for Intel ENERGY_PERF_BIAS feature 6402 */ 6403 int 6404 cpuid_iepb_supported(struct cpu *cp) 6405 { 6406 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 6407 struct cpuid_regs regs; 6408 6409 ASSERT(cpuid_checkpass(cp, 1)); 6410 6411 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) || 6412 !(is_x86_feature(x86_featureset, X86FSET_MSR))) { 6413 return (0); 6414 } 6415 6416 /* 6417 * Intel ENERGY_PERF_BIAS MSR is indicated by 6418 * capability bit CPUID.6.ECX.3 6419 */ 6420 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 6421 return (0); 6422 6423 regs.cp_eax = 0x6; 6424 (void) cpuid_insn(NULL, ®s); 6425 return (regs.cp_ecx & CPUID_EPB_SUPPORT); 6426 } 6427 6428 /* 6429 * Check support for TSC deadline timer 6430 * 6431 * TSC deadline timer provides a superior software programming 6432 * model over local APIC timer that eliminates "time drifts". 6433 * Instead of specifying a relative time, software specifies an 6434 * absolute time as the target at which the processor should 6435 * generate a timer event. 6436 */ 6437 int 6438 cpuid_deadline_tsc_supported(void) 6439 { 6440 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi; 6441 struct cpuid_regs regs; 6442 6443 ASSERT(cpuid_checkpass(CPU, 1)); 6444 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 6445 6446 switch (cpi->cpi_vendor) { 6447 case X86_VENDOR_Intel: 6448 if (cpi->cpi_maxeax >= 1) { 6449 regs.cp_eax = 1; 6450 (void) cpuid_insn(NULL, ®s); 6451 return (regs.cp_ecx & CPUID_DEADLINE_TSC); 6452 } else { 6453 return (0); 6454 } 6455 default: 6456 return (0); 6457 } 6458 } 6459 6460 #if defined(__amd64) && !defined(__xpv) 6461 /* 6462 * Patch in versions of bcopy for high performance Intel Nhm processors 6463 * and later... 6464 */ 6465 void 6466 patch_memops(uint_t vendor) 6467 { 6468 size_t cnt, i; 6469 caddr_t to, from; 6470 6471 if ((vendor == X86_VENDOR_Intel) && 6472 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) { 6473 cnt = &bcopy_patch_end - &bcopy_patch_start; 6474 to = &bcopy_ck_size; 6475 from = &bcopy_patch_start; 6476 for (i = 0; i < cnt; i++) { 6477 *to++ = *from++; 6478 } 6479 } 6480 } 6481 #endif /* __amd64 && !__xpv */ 6482 6483 /* 6484 * We're being asked to tell the system how many bits are required to represent 6485 * the various thread and strand IDs. While it's tempting to derive this based 6486 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite 6487 * correct. Instead, this needs to be based on the number of bits that the APIC 6488 * allows for these different configurations. We only update these to a larger 6489 * value if we find one. 6490 */ 6491 void 6492 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits) 6493 { 6494 struct cpuid_info *cpi; 6495 6496 VERIFY(cpuid_checkpass(CPU, 1)); 6497 cpi = cpu->cpu_m.mcpu_cpi; 6498 6499 if (cpi->cpi_ncore_bits > *core_nbits) { 6500 *core_nbits = cpi->cpi_ncore_bits; 6501 } 6502 6503 if (cpi->cpi_nthread_bits > *strand_nbits) { 6504 *strand_nbits = cpi->cpi_nthread_bits; 6505 } 6506 } 6507 6508 void 6509 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset) 6510 { 6511 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 6512 struct cpuid_regs cp; 6513 6514 /* 6515 * Reread the CPUID portions that we need for various security 6516 * information. 6517 */ 6518 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 6519 /* 6520 * Check if we now have leaf 7 available to us. 6521 */ 6522 if (cpi->cpi_maxeax < 7) { 6523 bzero(&cp, sizeof (cp)); 6524 cp.cp_eax = 0; 6525 cpi->cpi_maxeax = __cpuid_insn(&cp); 6526 if (cpi->cpi_maxeax < 7) 6527 return; 6528 } 6529 6530 bzero(&cp, sizeof (cp)); 6531 cp.cp_eax = 7; 6532 cp.cp_ecx = 0; 6533 (void) __cpuid_insn(&cp); 6534 cpi->cpi_std[7] = cp; 6535 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 6536 /* No xcpuid support */ 6537 if (cpi->cpi_family < 5 || 6538 (cpi->cpi_family == 5 && cpi->cpi_model < 1)) 6539 return; 6540 6541 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) { 6542 bzero(&cp, sizeof (cp)); 6543 cp.cp_eax = CPUID_LEAF_EXT_0; 6544 cpi->cpi_xmaxeax = __cpuid_insn(&cp); 6545 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) { 6546 return; 6547 } 6548 } 6549 6550 bzero(&cp, sizeof (cp)); 6551 cp.cp_eax = CPUID_LEAF_EXT_8; 6552 (void) __cpuid_insn(&cp); 6553 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp); 6554 cpi->cpi_extd[8] = cp; 6555 } else { 6556 /* 6557 * Nothing to do here. Return an empty set which has already 6558 * been zeroed for us. 6559 */ 6560 return; 6561 } 6562 cpuid_scan_security(cpu, fset); 6563 } 6564 6565 /* ARGSUSED */ 6566 static int 6567 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2) 6568 { 6569 uchar_t *fset; 6570 6571 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id); 6572 cpuid_pass_ucode(CPU, fset); 6573 6574 return (0); 6575 } 6576 6577 /* 6578 * After a microcode update where the version has changed, then we need to 6579 * rescan CPUID. To do this we check every CPU to make sure that they have the 6580 * same microcode. Then we perform a cross call to all such CPUs. It's the 6581 * caller's job to make sure that no one else can end up doing an update while 6582 * this is going on. 6583 * 6584 * We assume that the system is microcode capable if we're called. 6585 */ 6586 void 6587 cpuid_post_ucodeadm(void) 6588 { 6589 uint32_t rev; 6590 int i; 6591 struct cpu *cpu; 6592 cpuset_t cpuset; 6593 void *argdata; 6594 uchar_t *f0; 6595 6596 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP); 6597 6598 mutex_enter(&cpu_lock); 6599 cpu = cpu_get(0); 6600 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev; 6601 CPUSET_ONLY(cpuset, 0); 6602 for (i = 1; i < max_ncpus; i++) { 6603 if ((cpu = cpu_get(i)) == NULL) 6604 continue; 6605 6606 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) { 6607 panic("post microcode update CPU %d has differing " 6608 "microcode revision (%u) from CPU 0 (%u)", 6609 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev); 6610 } 6611 CPUSET_ADD(cpuset, i); 6612 } 6613 6614 kpreempt_disable(); 6615 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset), 6616 cpuid_post_ucodeadm_xc); 6617 kpreempt_enable(); 6618 6619 /* 6620 * OK, now look at each CPU and see if their feature sets are equal. 6621 */ 6622 f0 = argdata; 6623 for (i = 1; i < max_ncpus; i++) { 6624 uchar_t *fset; 6625 if (!CPU_IN_SET(cpuset, i)) 6626 continue; 6627 6628 fset = (uchar_t *)((uintptr_t)argdata + 6629 sizeof (x86_featureset) * i); 6630 6631 if (!compare_x86_featureset(f0, fset)) { 6632 panic("Post microcode update CPU %d has " 6633 "differing security feature (%p) set from CPU 0 " 6634 "(%p), not appending to feature set", i, 6635 (void *)fset, (void *)f0); 6636 } 6637 } 6638 6639 mutex_exit(&cpu_lock); 6640 6641 for (i = 0; i < NUM_X86_FEATURES; i++) { 6642 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n", 6643 x86_feature_names[i]); 6644 if (is_x86_feature(f0, i)) { 6645 add_x86_feature(x86_featureset, i); 6646 } 6647 } 6648 kmem_free(argdata, sizeof (x86_featureset) * NCPU); 6649 }