1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net> 26 */ 27 /* 28 * Copyright (c) 2010, Intel Corporation. 29 * All rights reserved. 30 */ 31 /* 32 * Portions Copyright 2009 Advanced Micro Devices, Inc. 33 */ 34 /* 35 * Copyright 2019 Joyent, Inc. 36 */ 37 38 /* 39 * CPU Identification logic 40 * 41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal 42 * with the identification of CPUs, their features, and their topologies. More 43 * specifically, this file helps drive the following: 44 * 45 * 1. Enumeration of features of the processor which are used by the kernel to 46 * determine what features to enable or disable. These may be instruction set 47 * enhancements or features that we use. 48 * 49 * 2. Enumeration of instruction set architecture (ISA) additions that userland 50 * will be told about through the auxiliary vector. 51 * 52 * 3. Understanding the physical topology of the CPU such as the number of 53 * caches, how many cores it has, whether or not it supports symmetric 54 * multi-processing (SMT), etc. 55 * 56 * ------------------------ 57 * CPUID History and Basics 58 * ------------------------ 59 * 60 * The cpuid instruction was added by Intel roughly around the time that the 61 * original Pentium was introduced. The purpose of cpuid was to tell in a 62 * programmatic fashion information about the CPU that previously was guessed 63 * at. For example, an important part of cpuid is that we can know what 64 * extensions to the ISA exist. If you use an invalid opcode you would get a 65 * #UD, so this method allows a program (whether a user program or the kernel) 66 * to determine what exists without crashing or getting a SIGILL. Of course, 67 * this was also during the era of the clones and the AMD Am5x86. The vendor 68 * name shows up first in cpuid for a reason. 69 * 70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts 71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has 72 * its own meaning. The different leaves are broken down into different regions: 73 * 74 * [ 0, 7fffffff ] This region is called the 'basic' 75 * region. This region is generally defined 76 * by Intel, though some of the original 77 * portions have different meanings based 78 * on the manufacturer. These days, Intel 79 * adds most new features to this region. 80 * AMD adds non-Intel compatible 81 * information in the third, extended 82 * region. Intel uses this for everything 83 * including ISA extensions, CPU 84 * features, cache information, topology, 85 * and more. 86 * 87 * There is a hole carved out of this 88 * region which is reserved for 89 * hypervisors. 90 * 91 * [ 40000000, 4fffffff ] This region, which is found in the 92 * middle of the previous region, is 93 * explicitly promised to never be used by 94 * CPUs. Instead, it is used by hypervisors 95 * to communicate information about 96 * themselves to the operating system. The 97 * values and details are unique for each 98 * hypervisor. 99 * 100 * [ 80000000, ffffffff ] This region is called the 'extended' 101 * region. Some of the low leaves mirror 102 * parts of the basic leaves. This region 103 * has generally been used by AMD for 104 * various extensions. For example, AMD- 105 * specific information about caches, 106 * features, and topology are found in this 107 * region. 108 * 109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx, 110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of 111 * the ranges, one of the primary things returned is the maximum valid leaf in 112 * that range. This allows for discovery of what range of CPUID is valid. 113 * 114 * The CPUs have potentially surprising behavior when using an invalid leaf or 115 * unimplemented leaf. If the requested leaf is within the valid basic or 116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be 117 * set to zero. However, if you specify a leaf that is outside of a valid range, 118 * then instead it will be filled with the last valid _basic_ leaf. For example, 119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or 120 * an invalid extended leaf will return the information for leaf 3. 121 * 122 * Some leaves are broken down into sub-leaves. This means that the value 123 * depends on both the leaf asked for in %eax and a secondary register. For 124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get 125 * additional information. Or when getting topology information in leaf 0xb, the 126 * initial value in %ecx changes which level of the topology that you are 127 * getting information about. 128 * 129 * cpuid values are always kept to 32 bits regardless of whether or not the 130 * program is in 64-bit mode. When executing in 64-bit mode, the upper 131 * 32 bits of the register are always set to zero so that way the values are the 132 * same regardless of execution mode. 133 * 134 * ---------------------- 135 * Identifying Processors 136 * ---------------------- 137 * 138 * We can identify a processor in two steps. The first step looks at cpuid leaf 139 * 0. Leaf 0 contains the processor's vendor information. This is done by 140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is 141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'. 142 * 143 * From there, a processor is identified by a combination of three different 144 * values: 145 * 146 * 1. Family 147 * 2. Model 148 * 3. Stepping 149 * 150 * Each vendor uses the family and model to uniquely identify a processor. The 151 * way that family and model are changed depends on the vendor. For example, 152 * Intel has been using family 0x6 for almost all of their processor since the 153 * Pentium Pro/Pentium II era, often called the P6. The model is used to 154 * identify the exact processor. Different models are often used for the client 155 * (consumer) and server parts. Even though each processor often has major 156 * architectural differences, they still are considered the same family by 157 * Intel. 158 * 159 * On the other hand, each major AMD architecture generally has its own family. 160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it 161 * the model number is used to help identify specific processors. 162 * 163 * The stepping is used to refer to a revision of a specific microprocessor. The 164 * term comes from equipment used to produce masks that are used to create 165 * integrated circuits. 166 * 167 * The information is present in leaf 1, %eax. In technical documentation you 168 * will see the terms extended model and extended family. The original family, 169 * model, and stepping fields were each 4 bits wide. If the values in either 170 * are 0xf, then one is to consult the extended model and extended family, which 171 * take previously reserved bits and allow for a larger number of models and add 172 * 0xf to them. 173 * 174 * When we process this information, we store the full family, model, and 175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and 176 * cpi_step, respectively. Whenever you are performing comparisons with the 177 * family, model, and stepping, you should use these members and not the raw 178 * values from cpuid. If you must use the raw values from cpuid directly, you 179 * must make sure that you add the extended model and family to the base model 180 * and family. 181 * 182 * In general, we do not use information about the family, model, and stepping 183 * to determine whether or not a feature is present; that is generally driven by 184 * specific leaves. However, when something we care about on the processor is 185 * not considered 'architectural' meaning that it is specific to a set of 186 * processors and not promised in the architecture model to be consistent from 187 * generation to generation, then we will fall back on this information. The 188 * most common cases where this comes up is when we have to workaround errata in 189 * the processor, are dealing with processor-specific features such as CPU 190 * performance counters, or we want to provide additional information for things 191 * such as fault management. 192 * 193 * While processors also do have a brand string, which is the name that people 194 * are familiar with when buying the processor, they are not meant for 195 * programmatic consumption. That is what the family, model, and stepping are 196 * for. 197 * 198 * ------------ 199 * CPUID Passes 200 * ------------ 201 * 202 * As part of performing feature detection, we break this into several different 203 * passes. The passes are as follows: 204 * 205 * Pass 0 This is a primordial pass done in locore.s to deal with 206 * Cyrix CPUs that don't support cpuid. The reality is that 207 * we likely don't run on them any more, but there is still 208 * logic for handling them. 209 * 210 * Pass 1 This is the primary pass and is responsible for doing a 211 * large number of different things: 212 * 213 * 1. Determine which vendor manufactured the CPU and 214 * determining the family, model, and stepping information. 215 * 216 * 2. Gathering a large number of feature flags to 217 * determine which features the CPU support and which 218 * indicate things that we need to do other work in the OS 219 * to enable. Features detected this way are added to the 220 * x86_featureset which can be queried to 221 * determine what we should do. This includes processing 222 * all of the basic and extended CPU features that we care 223 * about. 224 * 225 * 3. Determining the CPU's topology. This includes 226 * information about how many cores and threads are present 227 * in the package. It also is responsible for figuring out 228 * which logical CPUs are potentially part of the same core 229 * and what other resources they might share. For more 230 * information see the 'Topology' section. 231 * 232 * 4. Determining the set of CPU security-specific features 233 * that we need to worry about and determine the 234 * appropriate set of workarounds. 235 * 236 * Pass 1 on the boot CPU occurs before KMDB is started. 237 * 238 * Pass 2 The second pass is done after startup(). Here, we check 239 * other miscellaneous features. Most of this is gathering 240 * additional basic and extended features that we'll use in 241 * later passes or for debugging support. 242 * 243 * Pass 3 The third pass occurs after the kernel memory allocator 244 * has been fully initialized. This gathers information 245 * where we might need dynamic memory available for our 246 * uses. This includes several varying width leaves that 247 * have cache information and the processor's brand string. 248 * 249 * Pass 4 The fourth and final normal pass is performed after the 250 * kernel has brought most everything online. This is 251 * invoked from post_startup(). In this pass, we go through 252 * the set of features that we have enabled and turn that 253 * into the hardware auxiliary vector features that 254 * userland receives. This is used by userland, primarily 255 * by the run-time link-editor (RTLD), though userland 256 * software could also refer to it directly. 257 * 258 * Microcode After a microcode update, we do a selective rescan of 259 * the cpuid leaves to determine what features have 260 * changed. Microcode updates can provide more details 261 * about security related features to deal with issues like 262 * Spectre and L1TF. On occasion, vendors have violated 263 * their contract and removed bits. However, we don't try 264 * to detect that because that puts us in a situation that 265 * we really can't deal with. As such, the only thing we 266 * rescan are security related features today. See 267 * cpuid_pass_ucode(). 268 * 269 * All of the passes (except pass 0) are run on all CPUs. However, for the most 270 * part we only care about what the boot CPU says about this information and use 271 * the other CPUs as a rough guide to sanity check that we have the same feature 272 * set. 273 * 274 * We do not support running multiple logical CPUs with disjoint, let alone 275 * different, feature sets. 276 * 277 * ------------------ 278 * Processor Topology 279 * ------------------ 280 * 281 * One of the important things that we need to do is to understand the topology 282 * of the underlying processor. When we say topology in this case, we're trying 283 * to understand the relationship between the logical CPUs that the operating 284 * system sees and the underlying physical layout. Different logical CPUs may 285 * share different resources which can have important consequences for the 286 * performance of the system. For example, they may share caches, execution 287 * units, and more. 288 * 289 * The topology of the processor changes from generation to generation and 290 * vendor to vendor. Along with that, different vendors use different 291 * terminology, and the operating system itself uses occasionally overlapping 292 * terminology. It's important to understand what this topology looks like so 293 * one can understand the different things that we try to calculate and 294 * determine. 295 * 296 * To get started, let's talk about a little bit of terminology that we've used 297 * so far, is used throughout this file, and is fairly generic across multiple 298 * vendors: 299 * 300 * CPU 301 * A central processing unit (CPU) refers to a logical and/or virtual 302 * entity that the operating system can execute instructions on. The 303 * underlying resources for this CPU may be shared between multiple 304 * entities; however, to the operating system it is a discrete unit. 305 * 306 * PROCESSOR and PACKAGE 307 * 308 * Generally, when we use the term 'processor' on its own, we are referring 309 * to the physical entity that one buys and plugs into a board. However, 310 * because processor has been overloaded and one might see it used to mean 311 * multiple different levels, we will instead use the term 'package' for 312 * the rest of this file. The term package comes from the electrical 313 * engineering side and refers to the physical entity that encloses the 314 * electronics inside. Strictly speaking the package can contain more than 315 * just the CPU, for example, on many processors it may also have what's 316 * called an 'integrated graphical processing unit (GPU)'. Because the 317 * package can encapsulate multiple units, it is the largest physical unit 318 * that we refer to. 319 * 320 * SOCKET 321 * 322 * A socket refers to unit on a system board (generally the motherboard) 323 * that can receive a package. A single package, or processor, is plugged 324 * into a single socket. A system may have multiple sockets. Often times, 325 * the term socket is used interchangeably with package and refers to the 326 * electrical component that has plugged in, and not the receptacle itself. 327 * 328 * CORE 329 * 330 * A core refers to the physical instantiation of a CPU, generally, with a 331 * full set of hardware resources available to it. A package may contain 332 * multiple cores inside of it or it may just have a single one. A 333 * processor with more than one core is often referred to as 'multi-core'. 334 * In illumos, we will use the feature X86FSET_CMP to refer to a system 335 * that has 'multi-core' processors. 336 * 337 * A core may expose a single logical CPU to the operating system, or it 338 * may expose multiple CPUs, which we call threads, defined below. 339 * 340 * Some resources may still be shared by cores in the same package. For 341 * example, many processors will share the level 3 cache between cores. 342 * Some AMD generations share hardware resources between cores. For more 343 * information on that see the section 'AMD Topology'. 344 * 345 * THREAD and STRAND 346 * 347 * In this file, generally a thread refers to a hardware resources and not 348 * the operating system's logical abstraction. A thread is always exposed 349 * as an independent logical CPU to the operating system. A thread belongs 350 * to a specific core. A core may have more than one thread. When that is 351 * the case, the threads that are part of the same core are often referred 352 * to as 'siblings'. 353 * 354 * When multiple threads exist, this is generally referred to as 355 * simultaneous multi-threading (SMT). When Intel introduced this in their 356 * processors they called it hyper-threading (HT). When multiple threads 357 * are active in a core, they split the resources of the core. For example, 358 * two threads may share the same set of hardware execution units. 359 * 360 * The operating system often uses the term 'strand' to refer to a thread. 361 * This helps disambiguate it from the software concept. 362 * 363 * CHIP 364 * 365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most 366 * base meaning, it is used to refer to a single integrated circuit, which 367 * may or may not be the only thing in the package. In illumos, when you 368 * see the term 'chip' it is almost always referring to the same thing as 369 * the 'package'. However, many vendors may use chip to refer to one of 370 * many integrated circuits that have been placed in the package. As an 371 * example, see the subsequent definition. 372 * 373 * To try and keep things consistent, we will only use chip when referring 374 * to the entire integrated circuit package, with the exception of the 375 * definition of multi-chip module (because it is in the name) and use the 376 * term 'die' when we want the more general, potential sub-component 377 * definition. 378 * 379 * DIE 380 * 381 * A die refers to an integrated circuit. Inside of the package there may 382 * be a single die or multiple dies. This is sometimes called a 'chip' in 383 * vendor's parlance, but in this file, we use the term die to refer to a 384 * subcomponent. 385 * 386 * MULTI-CHIP MODULE 387 * 388 * A multi-chip module (MCM) refers to putting multiple distinct chips that 389 * are connected together in the same package. When a multi-chip design is 390 * used, generally each chip is manufactured independently and then joined 391 * together in the package. For example, on AMD's Zen microarchitecture 392 * (family 0x17), the package contains several dies (the second meaning of 393 * chip from above) that are connected together. 394 * 395 * CACHE 396 * 397 * A cache is a part of the processor that maintains copies of recently 398 * accessed memory. Caches are split into levels and then into types. 399 * Commonly there are one to three levels, called level one, two, and 400 * three. The lower the level, the smaller it is, the closer it is to the 401 * execution units of the CPU, and the faster it is to access. The layout 402 * and design of the cache come in many different flavors, consult other 403 * resources for a discussion of those. 404 * 405 * Caches are generally split into two types, the instruction and data 406 * cache. The caches contain what their names suggest, the instruction 407 * cache has executable program text, while the data cache has all other 408 * memory that the processor accesses. As of this writing, data is kept 409 * coherent between all of the caches on x86, so if one modifies program 410 * text before it is executed, that will be in the data cache, and the 411 * instruction cache will be synchronized with that change when the 412 * processor actually executes those instructions. This coherency also 413 * covers the fact that data could show up in multiple caches. 414 * 415 * Generally, the lowest level caches are specific to a core. However, the 416 * last layer cache is shared between some number of cores. The number of 417 * CPUs sharing this last level cache is important. This has implications 418 * for the choices that the scheduler makes, as accessing memory that might 419 * be in a remote cache after thread migration can be quite expensive. 420 * 421 * Sometimes, the word cache is abbreviated with a '$', because in US 422 * English the word cache is pronounced the same as cash. So L1D$ refers to 423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used 424 * in the rest of this theory statement for clarity. 425 * 426 * MEMORY CONTROLLER 427 * 428 * The memory controller is a component that provides access to DRAM. Each 429 * memory controller can access a set number of DRAM channels. Each channel 430 * can have a number of DIMMs (sticks of memory) associated with it. A 431 * given package may have more than one memory controller. The association 432 * of the memory controller to a group of cores is important as it is 433 * cheaper to access memory on the controller that you are associated with. 434 * 435 * NUMA 436 * 437 * NUMA or non-uniform memory access, describes a way that systems are 438 * built. On x86, any processor core can address all of the memory in the 439 * system. However, When using multiple sockets or possibly within a 440 * multi-chip module, some of that memory is physically closer and some of 441 * it is further. Memory that is further away is more expensive to access. 442 * Consider the following image of multiple sockets with memory: 443 * 444 * +--------+ +--------+ 445 * | DIMM A | +----------+ +----------+ | DIMM D | 446 * +--------+-+ | | | | +-+------+-+ 447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E | 448 * +--------+-+ | | | | +-+------+-+ 449 * | DIMM C | +----------+ +----------+ | DIMM F | 450 * +--------+ +--------+ 451 * 452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is 453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to 454 * access DIMMs A-C and more expensive to access D-F as it has to go 455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs 456 * D-F are cheaper than A-C. While the socket form is the most common, when 457 * using multi-chip modules, this can also sometimes occur. For another 458 * example of this that's more involved, see the AMD topology section. 459 * 460 * 461 * Intel Topology 462 * -------------- 463 * 464 * Most Intel processors since Nehalem, (as of this writing the current gen 465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of 466 * the package is a single monolithic die. MCMs currently aren't used. Most 467 * parts have three levels of caches, with the L3 cache being shared between 468 * all of the cores on the package. The L1/L2 cache is generally specific to 469 * an individual core. The following image shows at a simplified level what 470 * this looks like. The memory controller is commonly part of something called 471 * the 'Uncore', that used to be separate physical chips that were not a part of 472 * the package, but are now part of the same chip. 473 * 474 * +-----------------------------------------------------------------------+ 475 * | Package | 476 * | +-------------------+ +-------------------+ +-------------------+ | 477 * | | Core | | Core | | Core | | 478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | | 479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | | 480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | | 481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | | 482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | | 483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | | 484 * | | +--------------+ | | +--------------+ | | +--------------+ | | 485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | | 486 * | | +--------------+ | | +--------------+ | | +--------------+ | | 487 * | +-------------------+ +-------------------+ +-------------------+ | 488 * | +-------------------------------------------------------------------+ | 489 * | | Shared L3 Cache | | 490 * | +-------------------------------------------------------------------+ | 491 * | +-------------------------------------------------------------------+ | 492 * | | Memory Controller | | 493 * | +-------------------------------------------------------------------+ | 494 * +-----------------------------------------------------------------------+ 495 * 496 * A side effect of this current architecture is that what we care about from a 497 * scheduling and topology perspective, is simplified. In general we care about 498 * understanding which logical CPUs are part of the same core and socket. 499 * 500 * To determine the relationship between threads and cores, Intel initially used 501 * the identifier in the advanced programmable interrupt controller (APIC). They 502 * also added cpuid leaf 4 to give additional information about the number of 503 * threads and CPUs in the processor. With the addition of x2apic (which 504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an 505 * additional cpuid topology leaf 0xB was added. 506 * 507 * AMD Topology 508 * ------------ 509 * 510 * When discussing AMD topology, we want to break this into three distinct 511 * generations of topology. There's the basic topology that has been used in 512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced 513 * with family 0x15 (Bulldozer), and there's the topology that was introduced 514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth 515 * talking about. 516 * 517 * Until the introduction of family 0x17 (Zen), AMD did not implement something 518 * that they considered SMT. Whether or not the AMD processors have SMT 519 * influences many things including scheduling and reliability, availability, 520 * and serviceability (RAS) features. 521 * 522 * NODE 523 * 524 * AMD uses the term node to refer to a die that contains a number of cores 525 * and I/O resources. Depending on the processor family and model, more 526 * than one node can be present in the package. When there is more than one 527 * node this indicates a multi-chip module. Usually each node has its own 528 * access to memory and I/O devices. This is important and generally 529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a 530 * result, we track this relationship in the operating system. 531 * 532 * In processors with an L3 cache, the L3 cache is generally shared across 533 * the entire node, though the way this is carved up varies from generation 534 * to generation. 535 * 536 * BULLDOZER 537 * 538 * Starting with the Bulldozer family (0x15) and continuing until the 539 * introduction of the Zen microarchitecture, AMD introduced the idea of a 540 * compute unit. In a compute unit, two traditional cores share a number of 541 * hardware resources. Critically, they share the FPU, L1 instruction 542 * cache, and the L2 cache. Several compute units were then combined inside 543 * of a single node. Because the integer execution units, L1 data cache, 544 * and some other resources were not shared between the cores, AMD never 545 * considered this to be SMT. 546 * 547 * ZEN 548 * 549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module 550 * is called Zeppelin. These modules are similar to the idea of nodes used 551 * previously. Each of these nodes has two DRAM channels which all of the 552 * cores in the node can access uniformly. These nodes are linked together 553 * in the package, creating a NUMA environment. 554 * 555 * The Zeppelin die itself contains two different 'core complexes'. Each 556 * core complex consists of four cores which each have two threads, for a 557 * total of 8 logical CPUs per complex. Unlike other generations, 558 * where all the logical CPUs in a given node share the L3 cache, here each 559 * core complex has its own shared L3 cache. 560 * 561 * A further thing that we need to consider is that in some configurations, 562 * particularly with the Threadripper line of processors, not every die 563 * actually has its memory controllers wired up to actual memory channels. 564 * This means that some cores have memory attached to them and others 565 * don't. 566 * 567 * To put Zen in perspective, consider the following images: 568 * 569 * +--------------------------------------------------------+ 570 * | Core Complex | 571 * | +-------------------+ +-------------------+ +---+ | 572 * | | Core +----+ | | Core +----+ | | | | 573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | | 574 * | | | Thread | +----+ | | | Thread | +----+ | | | | 575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | | 576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | | 577 * | | +--------+ +--+ | | +--------+ +--+ | | | | 578 * | +-------------------+ +-------------------+ | C | | 579 * | +-------------------+ +-------------------+ | a | | 580 * | | Core +----+ | | Core +----+ | | c | | 581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | | 582 * | | | Thread | +----+ | | | Thread | +----+ | | e | | 583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | | 584 * | | | Thread | |L1| | | | Thread | |L1| | | | | 585 * | | +--------+ +--+ | | +--------+ +--+ | | | | 586 * | +-------------------+ +-------------------+ +---+ | 587 * | | 588 * +--------------------------------------------------------+ 589 * 590 * This first image represents a single Zen core complex that consists of four 591 * cores. 592 * 593 * 594 * +--------------------------------------------------------+ 595 * | Zeppelin Die | 596 * | +--------------------------------------------------+ | 597 * | | I/O Units (PCIe, SATA, USB, etc.) | | 598 * | +--------------------------------------------------+ | 599 * | HH | 600 * | +-----------+ HH +-----------+ | 601 * | | | HH | | | 602 * | | Core |==========| Core | | 603 * | | Complex |==========| Complex | | 604 * | | | HH | | | 605 * | +-----------+ HH +-----------+ | 606 * | HH | 607 * | +--------------------------------------------------+ | 608 * | | Memory Controller | | 609 * | +--------------------------------------------------+ | 610 * | | 611 * +--------------------------------------------------------+ 612 * 613 * This image represents a single Zeppelin Die. Note how both cores are 614 * connected to the same memory controller and I/O units. While each core 615 * complex has its own L3 cache as seen in the first image, they both have 616 * uniform access to memory. 617 * 618 * 619 * PP PP 620 * PP PP 621 * +----------PP---------------------PP---------+ 622 * | PP PP | 623 * | +-----------+ +-----------+ | 624 * | | | | | | 625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM 626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM 627 * | | | | | | 628 * | +-----------+ooo ...+-----------+ | 629 * | HH ooo ... HH | 630 * | HH oo.. HH | 631 * | HH ..oo HH | 632 * | HH ... ooo HH | 633 * | +-----------+... ooo+-----------+ | 634 * | | | | | | 635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM 636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM 637 * | | | | | | 638 * | +-----------+ +-----------+ | 639 * | PP PP | 640 * +----------PP---------------------PP---------+ 641 * PP PP 642 * PP PP 643 * 644 * This image represents a single Zen package. In this example, it has four 645 * Zeppelin dies, though some configurations only have a single one. In this 646 * example, each die is directly connected to the next. Also, each die is 647 * represented as being connected to memory by the 'M' character and connected 648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin 649 * die is made up of two core complexes, we have multiple different NUMA 650 * domains that we care about for these systems. 651 * 652 * CPUID LEAVES 653 * 654 * There are a few different CPUID leaves that we can use to try and understand 655 * the actual state of the world. As part of the introduction of family 0xf, AMD 656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical 657 * processors that are in the system. Because families before Zen didn't have 658 * SMT, this was always the number of cores that were in the system. However, it 659 * should always be thought of as the number of logical threads to be consistent 660 * between generations. In addition we also get the size of the APIC ID that is 661 * used to represent the number of logical processors. This is important for 662 * deriving topology information. 663 * 664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a 665 * bit between Bulldozer and later families, but it is quite useful in 666 * determining the topology information. Because this information has changed 667 * across family generations, it's worth calling out what these mean 668 * explicitly. The registers have the following meanings: 669 * 670 * %eax The APIC ID. The entire register is defined to have a 32-bit 671 * APIC ID, even though on systems without x2apic support, it will 672 * be limited to 8 bits. 673 * 674 * %ebx On Bulldozer-era systems this contains information about the 675 * number of cores that are in a compute unit (cores that share 676 * resources). It also contains a per-package compute unit ID that 677 * identifies which compute unit the logical CPU is a part of. 678 * 679 * On Zen-era systems this instead contains the number of threads 680 * per core and the ID of the core that the logical CPU is a part 681 * of. Note, this ID is unique only to the package, it is not 682 * globally unique across the entire system. 683 * 684 * %ecx This contains the number of nodes that exist in the package. It 685 * also contains an ID that identifies which node the logical CPU 686 * is a part of. 687 * 688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the 689 * cache layout to determine which logical CPUs are sharing which caches. 690 * 691 * illumos Topology 692 * ---------------- 693 * 694 * Based on the above we synthesize the information into several different 695 * variables that we store in the 'struct cpuid_info'. We'll go into the details 696 * of what each member is supposed to represent and their uniqueness. In 697 * general, there are two levels of uniqueness that we care about. We care about 698 * an ID that is globally unique. That means that it will be unique across all 699 * entities in the system. For example, the default logical CPU ID is globally 700 * unique. On the other hand, there is some information that we only care about 701 * being unique within the context of a single package / socket. Here are the 702 * variables that we keep track of and their meaning. 703 * 704 * Several of the values that are asking for an identifier, with the exception 705 * of cpi_apicid, are allowed to be synthetic. 706 * 707 * 708 * cpi_apicid 709 * 710 * This is the value of the CPU's APIC id. This should be the full 32-bit 711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit 712 * APIC ID. This value is globally unique between all logical CPUs across 713 * all packages. This is usually required by the APIC. 714 * 715 * cpi_chipid 716 * 717 * This value indicates the ID of the package that the logical CPU is a 718 * part of. This value is allowed to be synthetic. It is usually derived by 719 * taking the CPU's APIC ID and determining how many bits are used to 720 * represent CPU cores in the package. All logical CPUs that are part of 721 * the same package must have the same value. 722 * 723 * cpi_coreid 724 * 725 * This represents the ID of a CPU core. Two logical CPUs should only have 726 * the same cpi_coreid value if they are part of the same core. These 727 * values may be synthetic. On systems that support SMT, this value is 728 * usually derived from the APIC ID, otherwise it is often synthetic and 729 * just set to the value of the cpu_id in the cpu_t. 730 * 731 * cpi_pkgcoreid 732 * 733 * This is similar to the cpi_coreid in that logical CPUs that are part of 734 * the same core should have the same ID. The main difference is that these 735 * values are only required to be unique to a given socket. 736 * 737 * cpi_clogid 738 * 739 * This represents the logical ID of a logical CPU. This value should be 740 * unique within a given socket for each logical CPU. This is allowed to be 741 * synthetic, though it is usually based off of the CPU's apic ID. The 742 * broader system expects that logical CPUs that have are part of the same 743 * core have contiguous numbers. For example, if there were two threads per 744 * core, then the core IDs divided by two should be the same and the first 745 * modulus two should be zero and the second one. For example, IDs 4 and 5 746 * indicate two logical CPUs that are part of the same core. But IDs 5 and 747 * 6 represent two logical CPUs that are part of different cores. 748 * 749 * While it is common for the cpi_coreid and the cpi_clogid to be derived 750 * from the same source, strictly speaking, they don't have to be and the 751 * two values should be considered logically independent. One should not 752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine 753 * some kind of relationship. While this is tempting, we've seen cases on 754 * AMD family 0xf where the system's cpu id is not related to its APIC ID. 755 * 756 * cpi_ncpu_per_chip 757 * 758 * This value indicates the total number of logical CPUs that exist in the 759 * physical package. Critically, this is not the number of logical CPUs 760 * that exist for just the single core. 761 * 762 * This value should be the same for all logical CPUs in the same package. 763 * 764 * cpi_ncore_per_chip 765 * 766 * This value indicates the total number of physical CPU cores that exist 767 * in the package. The system compares this value with cpi_ncpu_per_chip to 768 * determine if simultaneous multi-threading (SMT) is enabled. When 769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and 770 * the X86FSET_HTT feature is not set. If this value is greater than one, 771 * than we consider the processor to have the feature X86FSET_CMP, to 772 * indicate that there is support for more than one core. 773 * 774 * This value should be the same for all logical CPUs in the same package. 775 * 776 * cpi_procnodes_per_pkg 777 * 778 * This value indicates the number of 'nodes' that exist in the package. 779 * When processors are actually a multi-chip module, this represents the 780 * number of such modules that exist in the package. Currently, on Intel 781 * based systems this member is always set to 1. 782 * 783 * This value should be the same for all logical CPUs in the same package. 784 * 785 * cpi_procnodeid 786 * 787 * This value indicates the ID of the node that the logical CPU is a part 788 * of. All logical CPUs that are in the same node must have the same value 789 * here. This value must be unique across all of the packages in the 790 * system. On Intel based systems, this is currently set to the value in 791 * cpi_chipid because there is only one node. 792 * 793 * cpi_cores_per_compunit 794 * 795 * This value indicates the number of cores that are part of a compute 796 * unit. See the AMD topology section for this. This member only has real 797 * meaning currently for AMD Bulldozer family processors. For all other 798 * processors, this should currently be set to 1. 799 * 800 * cpi_compunitid 801 * 802 * This indicates the compute unit that the logical CPU belongs to. For 803 * processors without AMD Bulldozer-style compute units this should be set 804 * to the value of cpi_coreid. 805 * 806 * cpi_ncpu_shr_last_cache 807 * 808 * This indicates the number of logical CPUs that are sharing the same last 809 * level cache. This value should be the same for all CPUs that are sharing 810 * that cache. The last cache refers to the cache that is closest to memory 811 * and furthest away from the CPU. 812 * 813 * cpi_last_lvl_cacheid 814 * 815 * This indicates the ID of the last cache that the logical CPU uses. This 816 * cache is often shared between multiple logical CPUs and is the cache 817 * that is closest to memory and furthest away from the CPU. This value 818 * should be the same for a group of logical CPUs only if they actually 819 * share the same last level cache. IDs should not overlap between 820 * packages. 821 * 822 * cpi_ncore_bits 823 * 824 * This indicates the number of bits that are required to represent all of 825 * the cores in the system. As cores are derived based on their APIC IDs, 826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for 827 * this value to be larger than the actual number of IDs that are present 828 * in the system. This is used to size tables by the CMI framework. It is 829 * only filled in for Intel and AMD CPUs. 830 * 831 * cpi_nthread_bits 832 * 833 * This indicates the number of bits required to represent all of the IDs 834 * that cover the logical CPUs that exist on a given core. It's OK for this 835 * value to be larger than the actual number of IDs that are present in the 836 * system. This is used to size tables by the CMI framework. It is 837 * only filled in for Intel and AMD CPUs. 838 * 839 * ----------- 840 * Hypervisors 841 * ----------- 842 * 843 * If trying to manage the differences between vendors wasn't bad enough, it can 844 * get worse thanks to our friend hardware virtualization. Hypervisors are given 845 * the ability to interpose on all cpuid instructions and change them to suit 846 * their purposes. In general, this is necessary as the hypervisor wants to be 847 * able to present a more uniform set of features or not necessarily give the 848 * guest operating system kernel knowledge of all features so it can be 849 * more easily migrated between systems. 850 * 851 * When it comes to trying to determine topology information, this can be a 852 * double edged sword. When a hypervisor doesn't actually implement a cpuid 853 * leaf, it'll often return all zeros. Because of that, you'll often see various 854 * checks scattered about fields being non-zero before we assume we can use 855 * them. 856 * 857 * When it comes to topology information, the hypervisor is often incentivized 858 * to lie to you about topology. This is because it doesn't always actually 859 * guarantee that topology at all. The topology path we take in the system 860 * depends on how the CPU advertises itself. If it advertises itself as an Intel 861 * or AMD CPU, then we basically do our normal path. However, when they don't 862 * use an actual vendor, then that usually turns into multiple one-core CPUs 863 * that we enumerate that are often on different sockets. The actual behavior 864 * depends greatly on what the hypervisor actually exposes to us. 865 * 866 * -------------------- 867 * Exposing Information 868 * -------------------- 869 * 870 * We expose CPUID information in three different forms in the system. 871 * 872 * The first is through the x86_featureset variable. This is used in conjunction 873 * with the is_x86_feature() function. This is queried by x86-specific functions 874 * to determine which features are or aren't present in the system and to make 875 * decisions based upon them. For example, users of this include everything from 876 * parts of the system dedicated to reliability, availability, and 877 * serviceability (RAS), to making decisions about how to handle security 878 * mitigations, to various x86-specific drivers. General purpose or 879 * architecture independent drivers should never be calling this function. 880 * 881 * The second means is through the auxiliary vector. The auxiliary vector is a 882 * series of tagged data that the kernel passes down to a user program when it 883 * begins executing. This information is used to indicate to programs what 884 * instruction set extensions are present. For example, information about the 885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down 886 * since user programs cannot make use of it. However, things like the AVX 887 * instruction sets are. Programs use this information to make run-time 888 * decisions about what features they should use. As an example, the run-time 889 * link-editor (rtld) can relocate different functions depending on the hardware 890 * support available. 891 * 892 * The final form is through a series of accessor functions that all have the 893 * form cpuid_get*. This is used by a number of different subsystems in the 894 * kernel to determine more detailed information about what we're running on, 895 * topology information, etc. Some of these subsystems include processor groups 896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI, 897 * microcode, and performance monitoring. These functions all ASSERT that the 898 * CPU they're being called on has reached a certain cpuid pass. If the passes 899 * are rearranged, then this needs to be adjusted. 900 */ 901 902 #include <sys/types.h> 903 #include <sys/archsystm.h> 904 #include <sys/x86_archext.h> 905 #include <sys/kmem.h> 906 #include <sys/systm.h> 907 #include <sys/cmn_err.h> 908 #include <sys/sunddi.h> 909 #include <sys/sunndi.h> 910 #include <sys/cpuvar.h> 911 #include <sys/processor.h> 912 #include <sys/sysmacros.h> 913 #include <sys/pg.h> 914 #include <sys/fp.h> 915 #include <sys/controlregs.h> 916 #include <sys/bitmap.h> 917 #include <sys/auxv_386.h> 918 #include <sys/memnode.h> 919 #include <sys/pci_cfgspace.h> 920 #include <sys/comm_page.h> 921 #include <sys/mach_mmu.h> 922 #include <sys/ucode.h> 923 #include <sys/tsc.h> 924 925 #ifdef __xpv 926 #include <sys/hypervisor.h> 927 #else 928 #include <sys/ontrap.h> 929 #endif 930 931 uint_t x86_vendor = X86_VENDOR_IntelClone; 932 uint_t x86_type = X86_TYPE_OTHER; 933 uint_t x86_clflush_size = 0; 934 935 #if defined(__xpv) 936 int x86_use_pcid = 0; 937 int x86_use_invpcid = 0; 938 #else 939 int x86_use_pcid = -1; 940 int x86_use_invpcid = -1; 941 #endif 942 943 uint_t pentiumpro_bug4046376; 944 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)]; 946 947 static char *x86_feature_names[NUM_X86_FEATURES] = { 948 "lgpg", 949 "tsc", 950 "msr", 951 "mtrr", 952 "pge", 953 "de", 954 "cmov", 955 "mmx", 956 "mca", 957 "pae", 958 "cv8", 959 "pat", 960 "sep", 961 "sse", 962 "sse2", 963 "htt", 964 "asysc", 965 "nx", 966 "sse3", 967 "cx16", 968 "cmp", 969 "tscp", 970 "mwait", 971 "sse4a", 972 "cpuid", 973 "ssse3", 974 "sse4_1", 975 "sse4_2", 976 "1gpg", 977 "clfsh", 978 "64", 979 "aes", 980 "pclmulqdq", 981 "xsave", 982 "avx", 983 "vmx", 984 "svm", 985 "topoext", 986 "f16c", 987 "rdrand", 988 "x2apic", 989 "avx2", 990 "bmi1", 991 "bmi2", 992 "fma", 993 "smep", 994 "smap", 995 "adx", 996 "rdseed", 997 "mpx", 998 "avx512f", 999 "avx512dq", 1000 "avx512pf", 1001 "avx512er", 1002 "avx512cd", 1003 "avx512bw", 1004 "avx512vl", 1005 "avx512fma", 1006 "avx512vbmi", 1007 "avx512_vpopcntdq", 1008 "avx512_4vnniw", 1009 "avx512_4fmaps", 1010 "xsaveopt", 1011 "xsavec", 1012 "xsaves", 1013 "sha", 1014 "umip", 1015 "pku", 1016 "ospke", 1017 "pcid", 1018 "invpcid", 1019 "ibrs", 1020 "ibpb", 1021 "stibp", 1022 "ssbd", 1023 "ssbd_virt", 1024 "rdcl_no", 1025 "ibrs_all", 1026 "rsba", 1027 "ssb_no", 1028 "stibp_all", 1029 "flush_cmd", 1030 "l1d_vmentry_no", 1031 "fsgsbase", 1032 "clflushopt", 1033 "clwb", 1034 "monitorx", 1035 "clzero", 1036 "xop", 1037 "fma4", 1038 "tbm", 1039 "avx512_vnni", 1040 "amd_pcec", 1041 "mb_clear", 1042 "mds_no", 1043 "core_thermal", 1044 "pkg_thermal" 1045 }; 1046 1047 boolean_t 1048 is_x86_feature(void *featureset, uint_t feature) 1049 { 1050 ASSERT(feature < NUM_X86_FEATURES); 1051 return (BT_TEST((ulong_t *)featureset, feature)); 1052 } 1053 1054 void 1055 add_x86_feature(void *featureset, uint_t feature) 1056 { 1057 ASSERT(feature < NUM_X86_FEATURES); 1058 BT_SET((ulong_t *)featureset, feature); 1059 } 1060 1061 void 1062 remove_x86_feature(void *featureset, uint_t feature) 1063 { 1064 ASSERT(feature < NUM_X86_FEATURES); 1065 BT_CLEAR((ulong_t *)featureset, feature); 1066 } 1067 1068 boolean_t 1069 compare_x86_featureset(void *setA, void *setB) 1070 { 1071 /* 1072 * We assume that the unused bits of the bitmap are always zero. 1073 */ 1074 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) { 1075 return (B_TRUE); 1076 } else { 1077 return (B_FALSE); 1078 } 1079 } 1080 1081 void 1082 print_x86_featureset(void *featureset) 1083 { 1084 uint_t i; 1085 1086 for (i = 0; i < NUM_X86_FEATURES; i++) { 1087 if (is_x86_feature(featureset, i)) { 1088 cmn_err(CE_CONT, "?x86_feature: %s\n", 1089 x86_feature_names[i]); 1090 } 1091 } 1092 } 1093 1094 /* Note: This is the maximum size for the CPU, not the size of the structure. */ 1095 static size_t xsave_state_size = 0; 1096 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE); 1097 boolean_t xsave_force_disable = B_FALSE; 1098 extern int disable_smap; 1099 1100 /* 1101 * This is set to platform type we are running on. 1102 */ 1103 static int platform_type = -1; 1104 1105 #if !defined(__xpv) 1106 /* 1107 * Variable to patch if hypervisor platform detection needs to be 1108 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0). 1109 */ 1110 int enable_platform_detection = 1; 1111 #endif 1112 1113 /* 1114 * monitor/mwait info. 1115 * 1116 * size_actual and buf_actual are the real address and size allocated to get 1117 * proper mwait_buf alignement. buf_actual and size_actual should be passed 1118 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use 1119 * processor cache-line alignment, but this is not guarantied in the furture. 1120 */ 1121 struct mwait_info { 1122 size_t mon_min; /* min size to avoid missed wakeups */ 1123 size_t mon_max; /* size to avoid false wakeups */ 1124 size_t size_actual; /* size actually allocated */ 1125 void *buf_actual; /* memory actually allocated */ 1126 uint32_t support; /* processor support of monitor/mwait */ 1127 }; 1128 1129 /* 1130 * xsave/xrestor info. 1131 * 1132 * This structure contains HW feature bits and the size of the xsave save area. 1133 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure 1134 * (xsave_state) to describe the xsave layout. However, at runtime the 1135 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The 1136 * xsave_state structure simply represents the legacy layout of the beginning 1137 * of the xsave area. 1138 */ 1139 struct xsave_info { 1140 uint32_t xsav_hw_features_low; /* Supported HW features */ 1141 uint32_t xsav_hw_features_high; /* Supported HW features */ 1142 size_t xsav_max_size; /* max size save area for HW features */ 1143 size_t ymm_size; /* AVX: size of ymm save area */ 1144 size_t ymm_offset; /* AVX: offset for ymm save area */ 1145 size_t bndregs_size; /* MPX: size of bndregs save area */ 1146 size_t bndregs_offset; /* MPX: offset for bndregs save area */ 1147 size_t bndcsr_size; /* MPX: size of bndcsr save area */ 1148 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */ 1149 size_t opmask_size; /* AVX512: size of opmask save */ 1150 size_t opmask_offset; /* AVX512: offset for opmask save */ 1151 size_t zmmlo_size; /* AVX512: size of zmm 256 save */ 1152 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */ 1153 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */ 1154 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */ 1155 }; 1156 1157 1158 /* 1159 * These constants determine how many of the elements of the 1160 * cpuid we cache in the cpuid_info data structure; the 1161 * remaining elements are accessible via the cpuid instruction. 1162 */ 1163 1164 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */ 1165 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */ 1166 1167 /* 1168 * See the big theory statement for a more detailed explanation of what some of 1169 * these members mean. 1170 */ 1171 struct cpuid_info { 1172 uint_t cpi_pass; /* last pass completed */ 1173 /* 1174 * standard function information 1175 */ 1176 uint_t cpi_maxeax; /* fn 0: %eax */ 1177 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */ 1178 uint_t cpi_vendor; /* enum of cpi_vendorstr */ 1179 1180 uint_t cpi_family; /* fn 1: extended family */ 1181 uint_t cpi_model; /* fn 1: extended model */ 1182 uint_t cpi_step; /* fn 1: stepping */ 1183 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */ 1184 /* AMD: package/socket # */ 1185 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */ 1186 int cpi_clogid; /* fn 1: %ebx: thread # */ 1187 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */ 1188 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */ 1189 uint_t cpi_ncache; /* fn 2: number of elements */ 1190 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */ 1191 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */ 1192 uint_t cpi_cache_leaf_size; /* Number of cache elements */ 1193 /* Intel fn: 4, AMD fn: 8000001d */ 1194 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */ 1195 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */ 1196 /* 1197 * extended function information 1198 */ 1199 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */ 1200 char cpi_brandstr[49]; /* fn 0x8000000[234] */ 1201 uint8_t cpi_pabits; /* fn 0x80000006: %eax */ 1202 uint8_t cpi_vabits; /* fn 0x80000006: %eax */ 1203 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */ 1204 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */ 1205 1206 id_t cpi_coreid; /* same coreid => strands share core */ 1207 int cpi_pkgcoreid; /* core number within single package */ 1208 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */ 1209 /* Intel: fn 4: %eax[31-26] */ 1210 1211 /* 1212 * These values represent the number of bits that are required to store 1213 * information about the number of cores and threads. 1214 */ 1215 uint_t cpi_ncore_bits; 1216 uint_t cpi_nthread_bits; 1217 /* 1218 * supported feature information 1219 */ 1220 uint32_t cpi_support[6]; 1221 #define STD_EDX_FEATURES 0 1222 #define AMD_EDX_FEATURES 1 1223 #define TM_EDX_FEATURES 2 1224 #define STD_ECX_FEATURES 3 1225 #define AMD_ECX_FEATURES 4 1226 #define STD_EBX_FEATURES 5 1227 /* 1228 * Synthesized information, where known. 1229 */ 1230 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */ 1231 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */ 1232 uint32_t cpi_socket; /* Chip package/socket type */ 1233 1234 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */ 1235 uint32_t cpi_apicid; 1236 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ 1237 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ 1238 /* Intel: 1 */ 1239 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */ 1240 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */ 1241 1242 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ 1243 }; 1244 1245 1246 static struct cpuid_info cpuid_info0; 1247 1248 /* 1249 * These bit fields are defined by the Intel Application Note AP-485 1250 * "Intel Processor Identification and the CPUID Instruction" 1251 */ 1252 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20) 1253 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16) 1254 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12) 1255 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8) 1256 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0) 1257 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4) 1258 1259 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx) 1260 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx) 1261 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx) 1262 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx) 1263 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx) 1264 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx) 1265 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx) 1266 1267 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0) 1268 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7) 1269 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16) 1270 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24) 1271 1272 #define CPI_MAXEAX_MAX 0x100 /* sanity control */ 1273 #define CPI_XMAXEAX_MAX 0x80000100 1274 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */ 1275 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */ 1276 1277 /* 1278 * Function 4 (Deterministic Cache Parameters) macros 1279 * Defined by Intel Application Note AP-485 1280 */ 1281 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26) 1282 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14) 1283 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9) 1284 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8) 1285 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5) 1286 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0) 1287 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8) 1288 1289 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22) 1290 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12) 1291 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0) 1292 1293 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0) 1294 1295 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0) 1296 1297 1298 /* 1299 * A couple of shorthand macros to identify "later" P6-family chips 1300 * like the Pentium M and Core. First, the "older" P6-based stuff 1301 * (loosely defined as "pre-Pentium-4"): 1302 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon 1303 */ 1304 #define IS_LEGACY_P6(cpi) ( \ 1305 cpi->cpi_family == 6 && \ 1306 (cpi->cpi_model == 1 || \ 1307 cpi->cpi_model == 3 || \ 1308 cpi->cpi_model == 5 || \ 1309 cpi->cpi_model == 6 || \ 1310 cpi->cpi_model == 7 || \ 1311 cpi->cpi_model == 8 || \ 1312 cpi->cpi_model == 0xA || \ 1313 cpi->cpi_model == 0xB) \ 1314 ) 1315 1316 /* A "new F6" is everything with family 6 that's not the above */ 1317 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi)) 1318 1319 /* Extended family/model support */ 1320 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \ 1321 cpi->cpi_family >= 0xf) 1322 1323 /* 1324 * Info for monitor/mwait idle loop. 1325 * 1326 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's 1327 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November 1328 * 2006. 1329 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual 1330 * Documentation Updates" #33633, Rev 2.05, December 2006. 1331 */ 1332 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */ 1333 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */ 1334 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */ 1335 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON) 1336 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2) 1337 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1) 1338 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0) 1339 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0) 1340 /* 1341 * Number of sub-cstates for a given c-state. 1342 */ 1343 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \ 1344 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) 1345 1346 /* 1347 * XSAVE leaf 0xD enumeration 1348 */ 1349 #define CPUID_LEAFD_2_YMM_OFFSET 576 1350 #define CPUID_LEAFD_2_YMM_SIZE 256 1351 1352 /* 1353 * Common extended leaf names to cut down on typos. 1354 */ 1355 #define CPUID_LEAF_EXT_0 0x80000000 1356 #define CPUID_LEAF_EXT_8 0x80000008 1357 #define CPUID_LEAF_EXT_1d 0x8000001d 1358 #define CPUID_LEAF_EXT_1e 0x8000001e 1359 1360 /* 1361 * Functions we consune from cpuid_subr.c; don't publish these in a header 1362 * file to try and keep people using the expected cpuid_* interfaces. 1363 */ 1364 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t); 1365 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t); 1366 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t); 1367 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t); 1368 extern uint_t _cpuid_vendorstr_to_vendorcode(char *); 1369 1370 /* 1371 * Apply up various platform-dependent restrictions where the 1372 * underlying platform restrictions mean the CPU can be marked 1373 * as less capable than its cpuid instruction would imply. 1374 */ 1375 #if defined(__xpv) 1376 static void 1377 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp) 1378 { 1379 switch (eax) { 1380 case 1: { 1381 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ? 1382 0 : CPUID_INTC_EDX_MCA; 1383 cp->cp_edx &= 1384 ~(mcamask | 1385 CPUID_INTC_EDX_PSE | 1386 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 1387 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR | 1388 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT | 1389 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 1390 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT); 1391 break; 1392 } 1393 1394 case 0x80000001: 1395 cp->cp_edx &= 1396 ~(CPUID_AMD_EDX_PSE | 1397 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE | 1398 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE | 1399 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 | 1400 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP | 1401 CPUID_AMD_EDX_TSCP); 1402 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY; 1403 break; 1404 default: 1405 break; 1406 } 1407 1408 switch (vendor) { 1409 case X86_VENDOR_Intel: 1410 switch (eax) { 1411 case 4: 1412 /* 1413 * Zero out the (ncores-per-chip - 1) field 1414 */ 1415 cp->cp_eax &= 0x03fffffff; 1416 break; 1417 default: 1418 break; 1419 } 1420 break; 1421 case X86_VENDOR_AMD: 1422 switch (eax) { 1423 1424 case 0x80000001: 1425 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D; 1426 break; 1427 1428 case CPUID_LEAF_EXT_8: 1429 /* 1430 * Zero out the (ncores-per-chip - 1) field 1431 */ 1432 cp->cp_ecx &= 0xffffff00; 1433 break; 1434 default: 1435 break; 1436 } 1437 break; 1438 default: 1439 break; 1440 } 1441 } 1442 #else 1443 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */ 1444 #endif 1445 1446 /* 1447 * Some undocumented ways of patching the results of the cpuid 1448 * instruction to permit running Solaris 10 on future cpus that 1449 * we don't currently support. Could be set to non-zero values 1450 * via settings in eeprom. 1451 */ 1452 1453 uint32_t cpuid_feature_ecx_include; 1454 uint32_t cpuid_feature_ecx_exclude; 1455 uint32_t cpuid_feature_edx_include; 1456 uint32_t cpuid_feature_edx_exclude; 1457 1458 /* 1459 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs. 1460 */ 1461 void 1462 cpuid_alloc_space(cpu_t *cpu) 1463 { 1464 /* 1465 * By convention, cpu0 is the boot cpu, which is set up 1466 * before memory allocation is available. All other cpus get 1467 * their cpuid_info struct allocated here. 1468 */ 1469 ASSERT(cpu->cpu_id != 0); 1470 ASSERT(cpu->cpu_m.mcpu_cpi == NULL); 1471 cpu->cpu_m.mcpu_cpi = 1472 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP); 1473 } 1474 1475 void 1476 cpuid_free_space(cpu_t *cpu) 1477 { 1478 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1479 int i; 1480 1481 ASSERT(cpi != NULL); 1482 ASSERT(cpi != &cpuid_info0); 1483 1484 /* 1485 * Free up any cache leaf related dynamic storage. The first entry was 1486 * cached from the standard cpuid storage, so we should not free it. 1487 */ 1488 for (i = 1; i < cpi->cpi_cache_leaf_size; i++) 1489 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs)); 1490 if (cpi->cpi_cache_leaf_size > 0) 1491 kmem_free(cpi->cpi_cache_leaves, 1492 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *)); 1493 1494 kmem_free(cpi, sizeof (*cpi)); 1495 cpu->cpu_m.mcpu_cpi = NULL; 1496 } 1497 1498 #if !defined(__xpv) 1499 /* 1500 * Determine the type of the underlying platform. This is used to customize 1501 * initialization of various subsystems (e.g. TSC). determine_platform() must 1502 * only ever be called once to prevent two processors from seeing different 1503 * values of platform_type. Must be called before cpuid_pass1(), the earliest 1504 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv). 1505 */ 1506 void 1507 determine_platform(void) 1508 { 1509 struct cpuid_regs cp; 1510 uint32_t base; 1511 uint32_t regs[4]; 1512 char *hvstr = (char *)regs; 1513 1514 ASSERT(platform_type == -1); 1515 1516 platform_type = HW_NATIVE; 1517 1518 if (!enable_platform_detection) 1519 return; 1520 1521 /* 1522 * If Hypervisor CPUID bit is set, try to determine hypervisor 1523 * vendor signature, and set platform type accordingly. 1524 * 1525 * References: 1526 * http://lkml.org/lkml/2008/10/1/246 1527 * http://kb.vmware.com/kb/1009458 1528 */ 1529 cp.cp_eax = 0x1; 1530 (void) __cpuid_insn(&cp); 1531 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) { 1532 cp.cp_eax = 0x40000000; 1533 (void) __cpuid_insn(&cp); 1534 regs[0] = cp.cp_ebx; 1535 regs[1] = cp.cp_ecx; 1536 regs[2] = cp.cp_edx; 1537 regs[3] = 0; 1538 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) { 1539 platform_type = HW_XEN_HVM; 1540 return; 1541 } 1542 if (strcmp(hvstr, HVSIG_VMWARE) == 0) { 1543 platform_type = HW_VMWARE; 1544 return; 1545 } 1546 if (strcmp(hvstr, HVSIG_KVM) == 0) { 1547 platform_type = HW_KVM; 1548 return; 1549 } 1550 if (strcmp(hvstr, HVSIG_BHYVE) == 0) { 1551 platform_type = HW_BHYVE; 1552 return; 1553 } 1554 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0) 1555 platform_type = HW_MICROSOFT; 1556 } else { 1557 /* 1558 * Check older VMware hardware versions. VMware hypervisor is 1559 * detected by performing an IN operation to VMware hypervisor 1560 * port and checking that value returned in %ebx is VMware 1561 * hypervisor magic value. 1562 * 1563 * References: http://kb.vmware.com/kb/1009458 1564 */ 1565 vmware_port(VMWARE_HVCMD_GETVERSION, regs); 1566 if (regs[1] == VMWARE_HVMAGIC) { 1567 platform_type = HW_VMWARE; 1568 return; 1569 } 1570 } 1571 1572 /* 1573 * Check Xen hypervisor. In a fully virtualized domain, 1574 * Xen's pseudo-cpuid function returns a string representing the 1575 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum 1576 * supported cpuid function. We need at least a (base + 2) leaf value 1577 * to do what we want to do. Try different base values, since the 1578 * hypervisor might use a different one depending on whether Hyper-V 1579 * emulation is switched on by default or not. 1580 */ 1581 for (base = 0x40000000; base < 0x40010000; base += 0x100) { 1582 cp.cp_eax = base; 1583 (void) __cpuid_insn(&cp); 1584 regs[0] = cp.cp_ebx; 1585 regs[1] = cp.cp_ecx; 1586 regs[2] = cp.cp_edx; 1587 regs[3] = 0; 1588 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 && 1589 cp.cp_eax >= (base + 2)) { 1590 platform_type &= ~HW_NATIVE; 1591 platform_type |= HW_XEN_HVM; 1592 return; 1593 } 1594 } 1595 } 1596 1597 int 1598 get_hwenv(void) 1599 { 1600 ASSERT(platform_type != -1); 1601 return (platform_type); 1602 } 1603 1604 int 1605 is_controldom(void) 1606 { 1607 return (0); 1608 } 1609 1610 #else 1611 1612 int 1613 get_hwenv(void) 1614 { 1615 return (HW_XEN_PV); 1616 } 1617 1618 int 1619 is_controldom(void) 1620 { 1621 return (DOMAIN_IS_INITDOMAIN(xen_info)); 1622 } 1623 1624 #endif /* __xpv */ 1625 1626 /* 1627 * Make sure that we have gathered all of the CPUID leaves that we might need to 1628 * determine topology. We assume that the standard leaf 1 has already been done 1629 * and that xmaxeax has already been calculated. 1630 */ 1631 static void 1632 cpuid_gather_amd_topology_leaves(cpu_t *cpu) 1633 { 1634 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1635 1636 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 1637 struct cpuid_regs *cp; 1638 1639 cp = &cpi->cpi_extd[8]; 1640 cp->cp_eax = CPUID_LEAF_EXT_8; 1641 (void) __cpuid_insn(cp); 1642 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp); 1643 } 1644 1645 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1646 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1647 struct cpuid_regs *cp; 1648 1649 cp = &cpi->cpi_extd[0x1e]; 1650 cp->cp_eax = CPUID_LEAF_EXT_1e; 1651 (void) __cpuid_insn(cp); 1652 } 1653 } 1654 1655 /* 1656 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer 1657 * it to everything else. If not, and we're on an AMD system where 8000001e is 1658 * valid, then we use that. Othewrise, we fall back to the default value for the 1659 * APIC ID in leaf 1. 1660 */ 1661 static uint32_t 1662 cpuid_gather_apicid(struct cpuid_info *cpi) 1663 { 1664 /* 1665 * Leaf B changes based on the arguments to it. Beacuse we don't cache 1666 * it, we need to gather it again. 1667 */ 1668 if (cpi->cpi_maxeax >= 0xB) { 1669 struct cpuid_regs regs; 1670 struct cpuid_regs *cp; 1671 1672 cp = ®s; 1673 cp->cp_eax = 0xB; 1674 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1675 (void) __cpuid_insn(cp); 1676 1677 if (cp->cp_ebx != 0) { 1678 return (cp->cp_edx); 1679 } 1680 } 1681 1682 if (cpi->cpi_vendor == X86_VENDOR_AMD && 1683 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1684 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1685 return (cpi->cpi_extd[0x1e].cp_eax); 1686 } 1687 1688 return (CPI_APIC_ID(cpi)); 1689 } 1690 1691 /* 1692 * For AMD processors, attempt to calculate the number of chips and cores that 1693 * exist. The way that we do this varies based on the generation, because the 1694 * generations themselves have changed dramatically. 1695 * 1696 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores. 1697 * However, with the advent of family 17h (Zen) it actually tells us the number 1698 * of threads, so we need to look at leaf 0x8000001e if available to determine 1699 * its value. Otherwise, for all prior families, the number of enabled cores is 1700 * the same as threads. 1701 * 1702 * If we do not have leaf 0x80000008, then we assume that this processor does 1703 * not have anything. AMD's older CPUID specification says there's no reason to 1704 * fall back to leaf 1. 1705 * 1706 * In some virtualization cases we will not have leaf 8000001e or it will be 1707 * zero. When that happens we assume the number of threads is one. 1708 */ 1709 static void 1710 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores) 1711 { 1712 uint_t nthreads, nthread_per_core; 1713 1714 nthreads = nthread_per_core = 1; 1715 1716 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 1717 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1; 1718 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) { 1719 nthreads = CPI_CPU_COUNT(cpi); 1720 } 1721 1722 /* 1723 * For us to have threads, and know about it, we have to be at least at 1724 * family 17h and have the cpuid bit that says we have extended 1725 * topology. 1726 */ 1727 if (cpi->cpi_family >= 0x17 && 1728 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1729 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1730 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1; 1731 } 1732 1733 *ncpus = nthreads; 1734 *ncores = nthreads / nthread_per_core; 1735 } 1736 1737 /* 1738 * Seed the initial values for the cores and threads for an Intel based 1739 * processor. These values will be overwritten if we detect that the processor 1740 * supports CPUID leaf 0xb. 1741 */ 1742 static void 1743 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores) 1744 { 1745 /* 1746 * Only seed the number of physical cores from the first level leaf 4 1747 * information. The number of threads there indicate how many share the 1748 * L1 cache, which may or may not have anything to do with the number of 1749 * logical CPUs per core. 1750 */ 1751 if (cpi->cpi_maxeax >= 4) { 1752 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1; 1753 } else { 1754 *ncores = 1; 1755 } 1756 1757 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) { 1758 *ncpus = CPI_CPU_COUNT(cpi); 1759 } else { 1760 *ncpus = *ncores; 1761 } 1762 } 1763 1764 static boolean_t 1765 cpuid_leafB_getids(cpu_t *cpu) 1766 { 1767 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1768 struct cpuid_regs regs; 1769 struct cpuid_regs *cp; 1770 1771 if (cpi->cpi_maxeax < 0xB) 1772 return (B_FALSE); 1773 1774 cp = ®s; 1775 cp->cp_eax = 0xB; 1776 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 1777 1778 (void) __cpuid_insn(cp); 1779 1780 /* 1781 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which 1782 * indicates that the extended topology enumeration leaf is 1783 * available. 1784 */ 1785 if (cp->cp_ebx != 0) { 1786 uint32_t x2apic_id = 0; 1787 uint_t coreid_shift = 0; 1788 uint_t ncpu_per_core = 1; 1789 uint_t chipid_shift = 0; 1790 uint_t ncpu_per_chip = 1; 1791 uint_t i; 1792 uint_t level; 1793 1794 for (i = 0; i < CPI_FNB_ECX_MAX; i++) { 1795 cp->cp_eax = 0xB; 1796 cp->cp_ecx = i; 1797 1798 (void) __cpuid_insn(cp); 1799 level = CPI_CPU_LEVEL_TYPE(cp); 1800 1801 if (level == 1) { 1802 x2apic_id = cp->cp_edx; 1803 coreid_shift = BITX(cp->cp_eax, 4, 0); 1804 ncpu_per_core = BITX(cp->cp_ebx, 15, 0); 1805 } else if (level == 2) { 1806 x2apic_id = cp->cp_edx; 1807 chipid_shift = BITX(cp->cp_eax, 4, 0); 1808 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0); 1809 } 1810 } 1811 1812 /* 1813 * cpi_apicid is taken care of in cpuid_gather_apicid. 1814 */ 1815 cpi->cpi_ncpu_per_chip = ncpu_per_chip; 1816 cpi->cpi_ncore_per_chip = ncpu_per_chip / 1817 ncpu_per_core; 1818 cpi->cpi_chipid = x2apic_id >> chipid_shift; 1819 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1); 1820 cpi->cpi_coreid = x2apic_id >> coreid_shift; 1821 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1822 cpi->cpi_procnodeid = cpi->cpi_chipid; 1823 cpi->cpi_compunitid = cpi->cpi_coreid; 1824 1825 if (coreid_shift > 0 && chipid_shift > coreid_shift) { 1826 cpi->cpi_nthread_bits = coreid_shift; 1827 cpi->cpi_ncore_bits = chipid_shift - coreid_shift; 1828 } 1829 1830 return (B_TRUE); 1831 } else { 1832 return (B_FALSE); 1833 } 1834 } 1835 1836 static void 1837 cpuid_intel_getids(cpu_t *cpu, void *feature) 1838 { 1839 uint_t i; 1840 uint_t chipid_shift = 0; 1841 uint_t coreid_shift = 0; 1842 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1843 1844 /* 1845 * There are no compute units or processor nodes currently on Intel. 1846 * Always set these to one. 1847 */ 1848 cpi->cpi_procnodes_per_pkg = 1; 1849 cpi->cpi_cores_per_compunit = 1; 1850 1851 /* 1852 * If cpuid Leaf B is present, use that to try and get this information. 1853 * It will be the most accurate for Intel CPUs. 1854 */ 1855 if (cpuid_leafB_getids(cpu)) 1856 return; 1857 1858 /* 1859 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip 1860 * and ncore_per_chip. These represent the largest power of two values 1861 * that we need to cover all of the IDs in the system. Therefore, we use 1862 * those values to seed the number of bits needed to cover information 1863 * in the case when leaf B is not available. These values will probably 1864 * be larger than required, but that's OK. 1865 */ 1866 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip); 1867 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip); 1868 1869 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1) 1870 chipid_shift++; 1871 1872 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift; 1873 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1); 1874 1875 if (is_x86_feature(feature, X86FSET_CMP)) { 1876 /* 1877 * Multi-core (and possibly multi-threaded) 1878 * processors. 1879 */ 1880 uint_t ncpu_per_core; 1881 if (cpi->cpi_ncore_per_chip == 1) 1882 ncpu_per_core = cpi->cpi_ncpu_per_chip; 1883 else if (cpi->cpi_ncore_per_chip > 1) 1884 ncpu_per_core = cpi->cpi_ncpu_per_chip / 1885 cpi->cpi_ncore_per_chip; 1886 /* 1887 * 8bit APIC IDs on dual core Pentiums 1888 * look like this: 1889 * 1890 * +-----------------------+------+------+ 1891 * | Physical Package ID | MC | HT | 1892 * +-----------------------+------+------+ 1893 * <------- chipid --------> 1894 * <------- coreid ---------------> 1895 * <--- clogid --> 1896 * <------> 1897 * pkgcoreid 1898 * 1899 * Where the number of bits necessary to 1900 * represent MC and HT fields together equals 1901 * to the minimum number of bits necessary to 1902 * store the value of cpi->cpi_ncpu_per_chip. 1903 * Of those bits, the MC part uses the number 1904 * of bits necessary to store the value of 1905 * cpi->cpi_ncore_per_chip. 1906 */ 1907 for (i = 1; i < ncpu_per_core; i <<= 1) 1908 coreid_shift++; 1909 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift; 1910 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift; 1911 } else if (is_x86_feature(feature, X86FSET_HTT)) { 1912 /* 1913 * Single-core multi-threaded processors. 1914 */ 1915 cpi->cpi_coreid = cpi->cpi_chipid; 1916 cpi->cpi_pkgcoreid = 0; 1917 } else { 1918 /* 1919 * Single-core single-thread processors. 1920 */ 1921 cpi->cpi_coreid = cpu->cpu_id; 1922 cpi->cpi_pkgcoreid = 0; 1923 } 1924 cpi->cpi_procnodeid = cpi->cpi_chipid; 1925 cpi->cpi_compunitid = cpi->cpi_coreid; 1926 } 1927 1928 /* 1929 * Historically, AMD has had CMP chips with only a single thread per core. 1930 * However, starting in family 17h (Zen), this has changed and they now have 1931 * multiple threads. Our internal core id needs to be a unique value. 1932 * 1933 * To determine the core id of an AMD system, if we're from a family before 17h, 1934 * then we just use the cpu id, as that gives us a good value that will be 1935 * unique for each core. If instead, we're on family 17h or later, then we need 1936 * to do something more complicated. CPUID leaf 0x8000001e can tell us 1937 * how many threads are in the system. Based on that, we'll shift the APIC ID. 1938 * We can't use the normal core id in that leaf as it's only unique within the 1939 * socket, which is perfect for cpi_pkgcoreid, but not us. 1940 */ 1941 static id_t 1942 cpuid_amd_get_coreid(cpu_t *cpu) 1943 { 1944 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1945 1946 if (cpi->cpi_family >= 0x17 && 1947 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 1948 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 1949 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1; 1950 if (nthreads > 1) { 1951 VERIFY3U(nthreads, ==, 2); 1952 return (cpi->cpi_apicid >> 1); 1953 } 1954 } 1955 1956 return (cpu->cpu_id); 1957 } 1958 1959 /* 1960 * IDs on AMD is a more challenging task. This is notable because of the 1961 * following two facts: 1962 * 1963 * 1. Before family 0x17 (Zen), there was no support for SMT and there was 1964 * also no way to get an actual unique core id from the system. As such, we 1965 * synthesize this case by using cpu->cpu_id. This scheme does not, 1966 * however, guarantee that sibling cores of a chip will have sequential 1967 * coreids starting at a multiple of the number of cores per chip - that is 1968 * usually the case, but if the ACPI MADT table is presented in a different 1969 * order then we need to perform a few more gymnastics for the pkgcoreid. 1970 * 1971 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups 1972 * called compute units. These compute units share the L1I cache, L2 cache, 1973 * and the FPU. To deal with this, a new topology leaf was added in 1974 * 0x8000001e. However, parts of this leaf have different meanings 1975 * once we get to family 0x17. 1976 */ 1977 1978 static void 1979 cpuid_amd_getids(cpu_t *cpu, uchar_t *features) 1980 { 1981 int i, first_half, coreidsz; 1982 uint32_t nb_caps_reg; 1983 uint_t node2_1; 1984 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 1985 struct cpuid_regs *cp; 1986 1987 /* 1988 * Calculate the core id (this comes from hardware in family 0x17 if it 1989 * hasn't been stripped by virtualization). We always set the compute 1990 * unit id to the same value. Also, initialize the default number of 1991 * cores per compute unit and nodes per package. This will be 1992 * overwritten when we know information about a particular family. 1993 */ 1994 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu); 1995 cpi->cpi_compunitid = cpi->cpi_coreid; 1996 cpi->cpi_cores_per_compunit = 1; 1997 cpi->cpi_procnodes_per_pkg = 1; 1998 1999 /* 2000 * To construct the logical ID, we need to determine how many APIC IDs 2001 * are dedicated to the cores and threads. This is provided for us in 2002 * 0x80000008. However, if it's not present (say due to virtualization), 2003 * then we assume it's one. This should be present on all 64-bit AMD 2004 * processors. It was added in family 0xf (Hammer). 2005 */ 2006 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 2007 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12); 2008 2009 /* 2010 * In AMD parlance chip is really a node while illumos 2011 * uses chip as equivalent to socket/package. 2012 */ 2013 if (coreidsz == 0) { 2014 /* Use legacy method */ 2015 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1) 2016 coreidsz++; 2017 if (coreidsz == 0) 2018 coreidsz = 1; 2019 } 2020 } else { 2021 /* Assume single-core part */ 2022 coreidsz = 1; 2023 } 2024 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1); 2025 2026 /* 2027 * The package core ID varies depending on the family. While it may be 2028 * tempting to use the CPUID_LEAF_EXT_1e %ebx core id, unfortunately, 2029 * this value is the core id in the given node. For non-virtualized 2030 * family 17h, we need to take the logical core id and shift off the 2031 * threads like we do when getting the core id. Otherwise, we can use 2032 * the clogid as is. When family 17h is virtualized, the clogid should 2033 * be sufficient as if we don't have valid data in the leaf, then we 2034 * won't think we have SMT, in which case the cpi_clogid should be 2035 * sufficient. 2036 */ 2037 if (cpi->cpi_family >= 0x17 && 2038 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 2039 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e && 2040 cpi->cpi_extd[0x1e].cp_ebx != 0) { 2041 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1; 2042 if (nthreads > 1) { 2043 VERIFY3U(nthreads, ==, 2); 2044 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1; 2045 } else { 2046 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 2047 } 2048 } else { 2049 cpi->cpi_pkgcoreid = cpi->cpi_clogid; 2050 } 2051 2052 /* 2053 * Obtain the node ID and compute unit IDs. If we're on family 0x15 2054 * (bulldozer) or newer, then we can derive all of this from leaf 2055 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family. 2056 */ 2057 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) && 2058 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) { 2059 cp = &cpi->cpi_extd[0x1e]; 2060 2061 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1; 2062 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0); 2063 2064 /* 2065 * For Bulldozer-era CPUs, recalculate the compute unit 2066 * information. 2067 */ 2068 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) { 2069 cpi->cpi_cores_per_compunit = 2070 BITX(cp->cp_ebx, 15, 8) + 1; 2071 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) + 2072 (cpi->cpi_ncore_per_chip / 2073 cpi->cpi_cores_per_compunit) * 2074 (cpi->cpi_procnodeid / 2075 cpi->cpi_procnodes_per_pkg); 2076 } 2077 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) { 2078 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7; 2079 } else if (cpi->cpi_family == 0x10) { 2080 /* 2081 * See if we are a multi-node processor. 2082 * All processors in the system have the same number of nodes 2083 */ 2084 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8); 2085 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) { 2086 /* Single-node */ 2087 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5, 2088 coreidsz); 2089 } else { 2090 2091 /* 2092 * Multi-node revision D (2 nodes per package 2093 * are supported) 2094 */ 2095 cpi->cpi_procnodes_per_pkg = 2; 2096 2097 first_half = (cpi->cpi_pkgcoreid <= 2098 (cpi->cpi_ncore_per_chip/2 - 1)); 2099 2100 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) { 2101 /* We are BSP */ 2102 cpi->cpi_procnodeid = (first_half ? 0 : 1); 2103 } else { 2104 2105 /* We are AP */ 2106 /* NodeId[2:1] bits to use for reading F3xe8 */ 2107 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1; 2108 2109 nb_caps_reg = 2110 pci_getl_func(0, 24 + node2_1, 3, 0xe8); 2111 2112 /* 2113 * Check IntNodeNum bit (31:30, but bit 31 is 2114 * always 0 on dual-node processors) 2115 */ 2116 if (BITX(nb_caps_reg, 30, 30) == 0) 2117 cpi->cpi_procnodeid = node2_1 + 2118 !first_half; 2119 else 2120 cpi->cpi_procnodeid = node2_1 + 2121 first_half; 2122 } 2123 } 2124 } else { 2125 cpi->cpi_procnodeid = 0; 2126 } 2127 2128 cpi->cpi_chipid = 2129 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg; 2130 2131 cpi->cpi_ncore_bits = coreidsz; 2132 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip / 2133 cpi->cpi_ncore_per_chip); 2134 } 2135 2136 static void 2137 spec_uarch_flush_noop(void) 2138 { 2139 } 2140 2141 /* 2142 * When microcode is present that mitigates MDS, this wrmsr will also flush the 2143 * MDS-related micro-architectural state that would normally happen by calling 2144 * x86_md_clear(). 2145 */ 2146 static void 2147 spec_uarch_flush_msr(void) 2148 { 2149 wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D); 2150 } 2151 2152 /* 2153 * This function points to a function that will flush certain 2154 * micro-architectural state on the processor. This flush is used to mitigate 2155 * two different classes of Intel CPU vulnerabilities: L1TF and MDS. This 2156 * function can point to one of three functions: 2157 * 2158 * - A noop which is done because we either are vulnerable, but do not have 2159 * microcode available to help deal with a fix, or because we aren't 2160 * vulnerable. 2161 * 2162 * - spec_uarch_flush_msr which will issue an L1D flush and if microcode to 2163 * mitigate MDS is present, also perform the equivalent of the MDS flush; 2164 * however, it only flushes the MDS related micro-architectural state on the 2165 * current hyperthread, it does not do anything for the twin. 2166 * 2167 * - x86_md_clear which will flush the MDS related state. This is done when we 2168 * have a processor that is vulnerable to MDS, but is not vulnerable to L1TF 2169 * (RDCL_NO is set). 2170 */ 2171 void (*spec_uarch_flush)(void) = spec_uarch_flush_noop; 2172 2173 void (*x86_md_clear)(void) = x86_md_clear_noop; 2174 2175 static void 2176 cpuid_update_md_clear(cpu_t *cpu, uchar_t *featureset) 2177 { 2178 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2179 2180 /* 2181 * While RDCL_NO indicates that one of the MDS vulnerabilities (MSBDS) 2182 * has been fixed in hardware, it doesn't cover everything related to 2183 * MDS. Therefore we can only rely on MDS_NO to determine that we don't 2184 * need to mitigate this. 2185 */ 2186 if (cpi->cpi_vendor != X86_VENDOR_Intel || 2187 is_x86_feature(featureset, X86FSET_MDS_NO)) { 2188 x86_md_clear = x86_md_clear_noop; 2189 membar_producer(); 2190 return; 2191 } 2192 2193 if (is_x86_feature(featureset, X86FSET_MD_CLEAR)) { 2194 x86_md_clear = x86_md_clear_verw; 2195 } 2196 2197 membar_producer(); 2198 } 2199 2200 static void 2201 cpuid_update_l1d_flush(cpu_t *cpu, uchar_t *featureset) 2202 { 2203 boolean_t need_l1d, need_mds; 2204 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2205 2206 /* 2207 * If we're not on Intel or we've mitigated both RDCL and MDS in 2208 * hardware, then there's nothing left for us to do for enabling the 2209 * flush. We can also go ahead and say that SMT exclusion is 2210 * unnecessary. 2211 */ 2212 if (cpi->cpi_vendor != X86_VENDOR_Intel || 2213 (is_x86_feature(featureset, X86FSET_RDCL_NO) && 2214 is_x86_feature(featureset, X86FSET_MDS_NO))) { 2215 extern int smt_exclusion; 2216 smt_exclusion = 0; 2217 spec_uarch_flush = spec_uarch_flush_noop; 2218 membar_producer(); 2219 return; 2220 } 2221 2222 /* 2223 * The locations where we need to perform an L1D flush are required both 2224 * for mitigating L1TF and MDS. When verw support is present in 2225 * microcode, then the L1D flush will take care of doing that as well. 2226 * However, if we have a system where RDCL_NO is present, but we don't 2227 * have MDS_NO, then we need to do a verw (x86_md_clear) and not a full 2228 * L1D flush. 2229 */ 2230 if (!is_x86_feature(featureset, X86FSET_RDCL_NO) && 2231 is_x86_feature(featureset, X86FSET_FLUSH_CMD) && 2232 !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) { 2233 need_l1d = B_TRUE; 2234 } else { 2235 need_l1d = B_FALSE; 2236 } 2237 2238 if (!is_x86_feature(featureset, X86FSET_MDS_NO) && 2239 is_x86_feature(featureset, X86FSET_MD_CLEAR)) { 2240 need_mds = B_TRUE; 2241 } else { 2242 need_mds = B_FALSE; 2243 } 2244 2245 if (need_l1d) { 2246 spec_uarch_flush = spec_uarch_flush_msr; 2247 } else if (need_mds) { 2248 spec_uarch_flush = x86_md_clear; 2249 } else { 2250 /* 2251 * We have no hardware mitigations available to us. 2252 */ 2253 spec_uarch_flush = spec_uarch_flush_noop; 2254 } 2255 membar_producer(); 2256 } 2257 2258 static void 2259 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset) 2260 { 2261 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2262 2263 if (cpi->cpi_vendor == X86_VENDOR_AMD && 2264 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) { 2265 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB) 2266 add_x86_feature(featureset, X86FSET_IBPB); 2267 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS) 2268 add_x86_feature(featureset, X86FSET_IBRS); 2269 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP) 2270 add_x86_feature(featureset, X86FSET_STIBP); 2271 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL) 2272 add_x86_feature(featureset, X86FSET_IBRS_ALL); 2273 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL) 2274 add_x86_feature(featureset, X86FSET_STIBP_ALL); 2275 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS) 2276 add_x86_feature(featureset, X86FSET_RSBA); 2277 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD) 2278 add_x86_feature(featureset, X86FSET_SSBD); 2279 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD) 2280 add_x86_feature(featureset, X86FSET_SSBD_VIRT); 2281 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO) 2282 add_x86_feature(featureset, X86FSET_SSB_NO); 2283 } else if (cpi->cpi_vendor == X86_VENDOR_Intel && 2284 cpi->cpi_maxeax >= 7) { 2285 struct cpuid_regs *ecp; 2286 ecp = &cpi->cpi_std[7]; 2287 2288 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_MD_CLEAR) { 2289 add_x86_feature(featureset, X86FSET_MD_CLEAR); 2290 } 2291 2292 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) { 2293 add_x86_feature(featureset, X86FSET_IBRS); 2294 add_x86_feature(featureset, X86FSET_IBPB); 2295 } 2296 2297 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) { 2298 add_x86_feature(featureset, X86FSET_STIBP); 2299 } 2300 2301 /* 2302 * Don't read the arch caps MSR on xpv where we lack the 2303 * on_trap(). 2304 */ 2305 #ifndef __xpv 2306 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) { 2307 on_trap_data_t otd; 2308 2309 /* 2310 * Be paranoid and assume we'll get a #GP. 2311 */ 2312 if (!on_trap(&otd, OT_DATA_ACCESS)) { 2313 uint64_t reg; 2314 2315 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES); 2316 if (reg & IA32_ARCH_CAP_RDCL_NO) { 2317 add_x86_feature(featureset, 2318 X86FSET_RDCL_NO); 2319 } 2320 if (reg & IA32_ARCH_CAP_IBRS_ALL) { 2321 add_x86_feature(featureset, 2322 X86FSET_IBRS_ALL); 2323 } 2324 if (reg & IA32_ARCH_CAP_RSBA) { 2325 add_x86_feature(featureset, 2326 X86FSET_RSBA); 2327 } 2328 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) { 2329 add_x86_feature(featureset, 2330 X86FSET_L1D_VM_NO); 2331 } 2332 if (reg & IA32_ARCH_CAP_SSB_NO) { 2333 add_x86_feature(featureset, 2334 X86FSET_SSB_NO); 2335 } 2336 if (reg & IA32_ARCH_CAP_MDS_NO) { 2337 add_x86_feature(featureset, 2338 X86FSET_MDS_NO); 2339 } 2340 } 2341 no_trap(); 2342 } 2343 #endif /* !__xpv */ 2344 2345 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD) 2346 add_x86_feature(featureset, X86FSET_SSBD); 2347 2348 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD) 2349 add_x86_feature(featureset, X86FSET_FLUSH_CMD); 2350 } 2351 2352 if (cpu->cpu_id != 0) 2353 return; 2354 2355 /* 2356 * We need to determine what changes are required for mitigating L1TF 2357 * and MDS. If the CPU suffers from either of them, then SMT exclusion 2358 * is required. 2359 * 2360 * If any of these are present, then we need to flush u-arch state at 2361 * various points. For MDS, we need to do so whenever we change to a 2362 * lesser privilege level or we are halting the CPU. For L1TF we need to 2363 * flush the L1D cache at VM entry. When we have microcode that handles 2364 * MDS, the L1D flush also clears the other u-arch state that the 2365 * md_clear does. 2366 */ 2367 2368 /* 2369 * Update whether or not we need to be taking explicit action against 2370 * MDS. 2371 */ 2372 cpuid_update_md_clear(cpu, featureset); 2373 2374 /* 2375 * Determine whether SMT exclusion is required and whether or not we 2376 * need to perform an l1d flush. 2377 */ 2378 cpuid_update_l1d_flush(cpu, featureset); 2379 } 2380 2381 /* 2382 * Setup XFeature_Enabled_Mask register. Required by xsave feature. 2383 */ 2384 void 2385 setup_xfem(void) 2386 { 2387 uint64_t flags = XFEATURE_LEGACY_FP; 2388 2389 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 2390 2391 if (is_x86_feature(x86_featureset, X86FSET_SSE)) 2392 flags |= XFEATURE_SSE; 2393 2394 if (is_x86_feature(x86_featureset, X86FSET_AVX)) 2395 flags |= XFEATURE_AVX; 2396 2397 if (is_x86_feature(x86_featureset, X86FSET_AVX512F)) 2398 flags |= XFEATURE_AVX512; 2399 2400 set_xcr(XFEATURE_ENABLED_MASK, flags); 2401 2402 xsave_bv_all = flags; 2403 } 2404 2405 static void 2406 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset) 2407 { 2408 struct cpuid_info *cpi; 2409 2410 cpi = cpu->cpu_m.mcpu_cpi; 2411 2412 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 2413 cpuid_gather_amd_topology_leaves(cpu); 2414 } 2415 2416 cpi->cpi_apicid = cpuid_gather_apicid(cpi); 2417 2418 /* 2419 * Before we can calculate the IDs that we should assign to this 2420 * processor, we need to understand how many cores and threads it has. 2421 */ 2422 switch (cpi->cpi_vendor) { 2423 case X86_VENDOR_Intel: 2424 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip, 2425 &cpi->cpi_ncore_per_chip); 2426 break; 2427 case X86_VENDOR_AMD: 2428 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip, 2429 &cpi->cpi_ncore_per_chip); 2430 break; 2431 default: 2432 /* 2433 * If we have some other x86 compatible chip, it's not clear how 2434 * they would behave. The most common case is virtualization 2435 * today, though there are also 64-bit VIA chips. Assume that 2436 * all we can get is the basic Leaf 1 HTT information. 2437 */ 2438 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) { 2439 cpi->cpi_ncore_per_chip = 1; 2440 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi); 2441 } 2442 break; 2443 } 2444 2445 /* 2446 * Based on the calculated number of threads and cores, potentially 2447 * assign the HTT and CMT features. 2448 */ 2449 if (cpi->cpi_ncore_per_chip > 1) { 2450 add_x86_feature(featureset, X86FSET_CMP); 2451 } 2452 2453 if (cpi->cpi_ncpu_per_chip > 1 && 2454 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) { 2455 add_x86_feature(featureset, X86FSET_HTT); 2456 } 2457 2458 /* 2459 * Now that has been set up, we need to go through and calculate all of 2460 * the rest of the parameters that exist. If we think the CPU doesn't 2461 * have either SMT (HTT) or CMP, then we basically go through and fake 2462 * up information in some way. The most likely case for this is 2463 * virtualization where we have a lot of partial topology information. 2464 */ 2465 if (!is_x86_feature(featureset, X86FSET_HTT) && 2466 !is_x86_feature(featureset, X86FSET_CMP)) { 2467 /* 2468 * This is a single core, single-threaded processor. 2469 */ 2470 cpi->cpi_procnodes_per_pkg = 1; 2471 cpi->cpi_cores_per_compunit = 1; 2472 cpi->cpi_compunitid = 0; 2473 cpi->cpi_chipid = -1; 2474 cpi->cpi_clogid = 0; 2475 cpi->cpi_coreid = cpu->cpu_id; 2476 cpi->cpi_pkgcoreid = 0; 2477 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 2478 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0); 2479 } else { 2480 cpi->cpi_procnodeid = cpi->cpi_chipid; 2481 } 2482 } else { 2483 switch (cpi->cpi_vendor) { 2484 case X86_VENDOR_Intel: 2485 cpuid_intel_getids(cpu, featureset); 2486 break; 2487 case X86_VENDOR_AMD: 2488 cpuid_amd_getids(cpu, featureset); 2489 break; 2490 default: 2491 /* 2492 * In this case, it's hard to say what we should do. 2493 * We're going to model them to the OS as single core 2494 * threads. We don't have a good identifier for them, so 2495 * we're just going to use the cpu id all on a single 2496 * chip. 2497 * 2498 * This case has historically been different from the 2499 * case above where we don't have HTT or CMP. While they 2500 * could be combined, we've opted to keep it separate to 2501 * minimize the risk of topology changes in weird cases. 2502 */ 2503 cpi->cpi_procnodes_per_pkg = 1; 2504 cpi->cpi_cores_per_compunit = 1; 2505 cpi->cpi_chipid = 0; 2506 cpi->cpi_coreid = cpu->cpu_id; 2507 cpi->cpi_clogid = cpu->cpu_id; 2508 cpi->cpi_pkgcoreid = cpu->cpu_id; 2509 cpi->cpi_procnodeid = cpi->cpi_chipid; 2510 cpi->cpi_compunitid = cpi->cpi_coreid; 2511 break; 2512 } 2513 } 2514 } 2515 2516 /* 2517 * Gather relevant CPU features from leaf 6 which covers thermal information. We 2518 * always gather leaf 6 if it's supported; however, we only look for features on 2519 * Intel systems as AMD does not currently define any of the features we look 2520 * for below. 2521 */ 2522 static void 2523 cpuid_pass1_thermal(cpu_t *cpu, uchar_t *featureset) 2524 { 2525 struct cpuid_regs *cp; 2526 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 2527 2528 if (cpi->cpi_maxeax < 6) { 2529 return; 2530 } 2531 2532 cp = &cpi->cpi_std[6]; 2533 cp->cp_eax = 6; 2534 cp->cp_ebx = cp->cp_ecx = cp->cp_edx = 0; 2535 (void) __cpuid_insn(cp); 2536 platform_cpuid_mangle(cpi->cpi_vendor, 6, cp); 2537 2538 if (cpi->cpi_vendor != X86_VENDOR_Intel) { 2539 return; 2540 } 2541 2542 if ((cp->cp_eax & CPUID_INTC_EAX_DTS) != 0) { 2543 add_x86_feature(featureset, X86FSET_CORE_THERMAL); 2544 } 2545 2546 if ((cp->cp_eax & CPUID_INTC_EAX_PTM) != 0) { 2547 add_x86_feature(featureset, X86FSET_PKG_THERMAL); 2548 } 2549 } 2550 2551 void 2552 cpuid_pass1(cpu_t *cpu, uchar_t *featureset) 2553 { 2554 uint32_t mask_ecx, mask_edx; 2555 struct cpuid_info *cpi; 2556 struct cpuid_regs *cp; 2557 int xcpuid; 2558 #if !defined(__xpv) 2559 extern int idle_cpu_prefer_mwait; 2560 #endif 2561 2562 /* 2563 * Space statically allocated for BSP, ensure pointer is set 2564 */ 2565 if (cpu->cpu_id == 0) { 2566 if (cpu->cpu_m.mcpu_cpi == NULL) 2567 cpu->cpu_m.mcpu_cpi = &cpuid_info0; 2568 } 2569 2570 add_x86_feature(featureset, X86FSET_CPUID); 2571 2572 cpi = cpu->cpu_m.mcpu_cpi; 2573 ASSERT(cpi != NULL); 2574 cp = &cpi->cpi_std[0]; 2575 cp->cp_eax = 0; 2576 cpi->cpi_maxeax = __cpuid_insn(cp); 2577 { 2578 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr; 2579 *iptr++ = cp->cp_ebx; 2580 *iptr++ = cp->cp_edx; 2581 *iptr++ = cp->cp_ecx; 2582 *(char *)&cpi->cpi_vendorstr[12] = '\0'; 2583 } 2584 2585 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr); 2586 x86_vendor = cpi->cpi_vendor; /* for compatibility */ 2587 2588 /* 2589 * Limit the range in case of weird hardware 2590 */ 2591 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX) 2592 cpi->cpi_maxeax = CPI_MAXEAX_MAX; 2593 if (cpi->cpi_maxeax < 1) 2594 goto pass1_done; 2595 2596 cp = &cpi->cpi_std[1]; 2597 cp->cp_eax = 1; 2598 (void) __cpuid_insn(cp); 2599 2600 /* 2601 * Extract identifying constants for easy access. 2602 */ 2603 cpi->cpi_model = CPI_MODEL(cpi); 2604 cpi->cpi_family = CPI_FAMILY(cpi); 2605 2606 if (cpi->cpi_family == 0xf) 2607 cpi->cpi_family += CPI_FAMILY_XTD(cpi); 2608 2609 /* 2610 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf. 2611 * Intel, and presumably everyone else, uses model == 0xf, as 2612 * one would expect (max value means possible overflow). Sigh. 2613 */ 2614 2615 switch (cpi->cpi_vendor) { 2616 case X86_VENDOR_Intel: 2617 if (IS_EXTENDED_MODEL_INTEL(cpi)) 2618 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 2619 break; 2620 case X86_VENDOR_AMD: 2621 if (CPI_FAMILY(cpi) == 0xf) 2622 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 2623 break; 2624 default: 2625 if (cpi->cpi_model == 0xf) 2626 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4; 2627 break; 2628 } 2629 2630 cpi->cpi_step = CPI_STEP(cpi); 2631 cpi->cpi_brandid = CPI_BRANDID(cpi); 2632 2633 /* 2634 * *default* assumptions: 2635 * - believe %edx feature word 2636 * - ignore %ecx feature word 2637 * - 32-bit virtual and physical addressing 2638 */ 2639 mask_edx = 0xffffffff; 2640 mask_ecx = 0; 2641 2642 cpi->cpi_pabits = cpi->cpi_vabits = 32; 2643 2644 switch (cpi->cpi_vendor) { 2645 case X86_VENDOR_Intel: 2646 if (cpi->cpi_family == 5) 2647 x86_type = X86_TYPE_P5; 2648 else if (IS_LEGACY_P6(cpi)) { 2649 x86_type = X86_TYPE_P6; 2650 pentiumpro_bug4046376 = 1; 2651 /* 2652 * Clear the SEP bit when it was set erroneously 2653 */ 2654 if (cpi->cpi_model < 3 && cpi->cpi_step < 3) 2655 cp->cp_edx &= ~CPUID_INTC_EDX_SEP; 2656 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) { 2657 x86_type = X86_TYPE_P4; 2658 /* 2659 * We don't currently depend on any of the %ecx 2660 * features until Prescott, so we'll only check 2661 * this from P4 onwards. We might want to revisit 2662 * that idea later. 2663 */ 2664 mask_ecx = 0xffffffff; 2665 } else if (cpi->cpi_family > 0xf) 2666 mask_ecx = 0xffffffff; 2667 /* 2668 * We don't support MONITOR/MWAIT if leaf 5 is not available 2669 * to obtain the monitor linesize. 2670 */ 2671 if (cpi->cpi_maxeax < 5) 2672 mask_ecx &= ~CPUID_INTC_ECX_MON; 2673 break; 2674 case X86_VENDOR_IntelClone: 2675 default: 2676 break; 2677 case X86_VENDOR_AMD: 2678 #if defined(OPTERON_ERRATUM_108) 2679 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) { 2680 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0; 2681 cpi->cpi_model = 0xc; 2682 } else 2683 #endif 2684 if (cpi->cpi_family == 5) { 2685 /* 2686 * AMD K5 and K6 2687 * 2688 * These CPUs have an incomplete implementation 2689 * of MCA/MCE which we mask away. 2690 */ 2691 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA); 2692 2693 /* 2694 * Model 0 uses the wrong (APIC) bit 2695 * to indicate PGE. Fix it here. 2696 */ 2697 if (cpi->cpi_model == 0) { 2698 if (cp->cp_edx & 0x200) { 2699 cp->cp_edx &= ~0x200; 2700 cp->cp_edx |= CPUID_INTC_EDX_PGE; 2701 } 2702 } 2703 2704 /* 2705 * Early models had problems w/ MMX; disable. 2706 */ 2707 if (cpi->cpi_model < 6) 2708 mask_edx &= ~CPUID_INTC_EDX_MMX; 2709 } 2710 2711 /* 2712 * For newer families, SSE3 and CX16, at least, are valid; 2713 * enable all 2714 */ 2715 if (cpi->cpi_family >= 0xf) 2716 mask_ecx = 0xffffffff; 2717 /* 2718 * We don't support MONITOR/MWAIT if leaf 5 is not available 2719 * to obtain the monitor linesize. 2720 */ 2721 if (cpi->cpi_maxeax < 5) 2722 mask_ecx &= ~CPUID_INTC_ECX_MON; 2723 2724 #if !defined(__xpv) 2725 /* 2726 * AMD has not historically used MWAIT in the CPU's idle loop. 2727 * Pre-family-10h Opterons do not have the MWAIT instruction. We 2728 * know for certain that in at least family 17h, per AMD, mwait 2729 * is preferred. Families in-between are less certain. 2730 */ 2731 if (cpi->cpi_family < 0x17) { 2732 idle_cpu_prefer_mwait = 0; 2733 } 2734 #endif 2735 2736 break; 2737 case X86_VENDOR_TM: 2738 /* 2739 * workaround the NT workaround in CMS 4.1 2740 */ 2741 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 && 2742 (cpi->cpi_step == 2 || cpi->cpi_step == 3)) 2743 cp->cp_edx |= CPUID_INTC_EDX_CX8; 2744 break; 2745 case X86_VENDOR_Centaur: 2746 /* 2747 * workaround the NT workarounds again 2748 */ 2749 if (cpi->cpi_family == 6) 2750 cp->cp_edx |= CPUID_INTC_EDX_CX8; 2751 break; 2752 case X86_VENDOR_Cyrix: 2753 /* 2754 * We rely heavily on the probing in locore 2755 * to actually figure out what parts, if any, 2756 * of the Cyrix cpuid instruction to believe. 2757 */ 2758 switch (x86_type) { 2759 case X86_TYPE_CYRIX_486: 2760 mask_edx = 0; 2761 break; 2762 case X86_TYPE_CYRIX_6x86: 2763 mask_edx = 0; 2764 break; 2765 case X86_TYPE_CYRIX_6x86L: 2766 mask_edx = 2767 CPUID_INTC_EDX_DE | 2768 CPUID_INTC_EDX_CX8; 2769 break; 2770 case X86_TYPE_CYRIX_6x86MX: 2771 mask_edx = 2772 CPUID_INTC_EDX_DE | 2773 CPUID_INTC_EDX_MSR | 2774 CPUID_INTC_EDX_CX8 | 2775 CPUID_INTC_EDX_PGE | 2776 CPUID_INTC_EDX_CMOV | 2777 CPUID_INTC_EDX_MMX; 2778 break; 2779 case X86_TYPE_CYRIX_GXm: 2780 mask_edx = 2781 CPUID_INTC_EDX_MSR | 2782 CPUID_INTC_EDX_CX8 | 2783 CPUID_INTC_EDX_CMOV | 2784 CPUID_INTC_EDX_MMX; 2785 break; 2786 case X86_TYPE_CYRIX_MediaGX: 2787 break; 2788 case X86_TYPE_CYRIX_MII: 2789 case X86_TYPE_VIA_CYRIX_III: 2790 mask_edx = 2791 CPUID_INTC_EDX_DE | 2792 CPUID_INTC_EDX_TSC | 2793 CPUID_INTC_EDX_MSR | 2794 CPUID_INTC_EDX_CX8 | 2795 CPUID_INTC_EDX_PGE | 2796 CPUID_INTC_EDX_CMOV | 2797 CPUID_INTC_EDX_MMX; 2798 break; 2799 default: 2800 break; 2801 } 2802 break; 2803 } 2804 2805 #if defined(__xpv) 2806 /* 2807 * Do not support MONITOR/MWAIT under a hypervisor 2808 */ 2809 mask_ecx &= ~CPUID_INTC_ECX_MON; 2810 /* 2811 * Do not support XSAVE under a hypervisor for now 2812 */ 2813 xsave_force_disable = B_TRUE; 2814 2815 #endif /* __xpv */ 2816 2817 if (xsave_force_disable) { 2818 mask_ecx &= ~CPUID_INTC_ECX_XSAVE; 2819 mask_ecx &= ~CPUID_INTC_ECX_AVX; 2820 mask_ecx &= ~CPUID_INTC_ECX_F16C; 2821 mask_ecx &= ~CPUID_INTC_ECX_FMA; 2822 } 2823 2824 /* 2825 * Now we've figured out the masks that determine 2826 * which bits we choose to believe, apply the masks 2827 * to the feature words, then map the kernel's view 2828 * of these feature words into its feature word. 2829 */ 2830 cp->cp_edx &= mask_edx; 2831 cp->cp_ecx &= mask_ecx; 2832 2833 /* 2834 * apply any platform restrictions (we don't call this 2835 * immediately after __cpuid_insn here, because we need the 2836 * workarounds applied above first) 2837 */ 2838 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp); 2839 2840 /* 2841 * In addition to ecx and edx, Intel and AMD are storing a bunch of 2842 * instruction set extensions in leaf 7's ebx, ecx, and edx. 2843 */ 2844 if (cpi->cpi_maxeax >= 7) { 2845 struct cpuid_regs *ecp; 2846 ecp = &cpi->cpi_std[7]; 2847 ecp->cp_eax = 7; 2848 ecp->cp_ecx = 0; 2849 (void) __cpuid_insn(ecp); 2850 2851 /* 2852 * If XSAVE has been disabled, just ignore all of the 2853 * extended-save-area dependent flags here. 2854 */ 2855 if (xsave_force_disable) { 2856 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 2857 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 2858 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 2859 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX; 2860 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512; 2861 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512; 2862 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512; 2863 } 2864 2865 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP) 2866 add_x86_feature(featureset, X86FSET_SMEP); 2867 2868 /* 2869 * We check disable_smap here in addition to in startup_smap() 2870 * to ensure CPUs that aren't the boot CPU don't accidentally 2871 * include it in the feature set and thus generate a mismatched 2872 * x86 feature set across CPUs. 2873 */ 2874 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP && 2875 disable_smap == 0) 2876 add_x86_feature(featureset, X86FSET_SMAP); 2877 2878 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED) 2879 add_x86_feature(featureset, X86FSET_RDSEED); 2880 2881 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX) 2882 add_x86_feature(featureset, X86FSET_ADX); 2883 2884 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE) 2885 add_x86_feature(featureset, X86FSET_FSGSBASE); 2886 2887 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT) 2888 add_x86_feature(featureset, X86FSET_CLFLUSHOPT); 2889 2890 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 2891 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID) 2892 add_x86_feature(featureset, X86FSET_INVPCID); 2893 2894 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX) 2895 add_x86_feature(featureset, X86FSET_MPX); 2896 2897 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB) 2898 add_x86_feature(featureset, X86FSET_CLWB); 2899 } 2900 } 2901 2902 /* 2903 * fold in overrides from the "eeprom" mechanism 2904 */ 2905 cp->cp_edx |= cpuid_feature_edx_include; 2906 cp->cp_edx &= ~cpuid_feature_edx_exclude; 2907 2908 cp->cp_ecx |= cpuid_feature_ecx_include; 2909 cp->cp_ecx &= ~cpuid_feature_ecx_exclude; 2910 2911 if (cp->cp_edx & CPUID_INTC_EDX_PSE) { 2912 add_x86_feature(featureset, X86FSET_LARGEPAGE); 2913 } 2914 if (cp->cp_edx & CPUID_INTC_EDX_TSC) { 2915 add_x86_feature(featureset, X86FSET_TSC); 2916 } 2917 if (cp->cp_edx & CPUID_INTC_EDX_MSR) { 2918 add_x86_feature(featureset, X86FSET_MSR); 2919 } 2920 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) { 2921 add_x86_feature(featureset, X86FSET_MTRR); 2922 } 2923 if (cp->cp_edx & CPUID_INTC_EDX_PGE) { 2924 add_x86_feature(featureset, X86FSET_PGE); 2925 } 2926 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) { 2927 add_x86_feature(featureset, X86FSET_CMOV); 2928 } 2929 if (cp->cp_edx & CPUID_INTC_EDX_MMX) { 2930 add_x86_feature(featureset, X86FSET_MMX); 2931 } 2932 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 && 2933 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) { 2934 add_x86_feature(featureset, X86FSET_MCA); 2935 } 2936 if (cp->cp_edx & CPUID_INTC_EDX_PAE) { 2937 add_x86_feature(featureset, X86FSET_PAE); 2938 } 2939 if (cp->cp_edx & CPUID_INTC_EDX_CX8) { 2940 add_x86_feature(featureset, X86FSET_CX8); 2941 } 2942 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) { 2943 add_x86_feature(featureset, X86FSET_CX16); 2944 } 2945 if (cp->cp_edx & CPUID_INTC_EDX_PAT) { 2946 add_x86_feature(featureset, X86FSET_PAT); 2947 } 2948 if (cp->cp_edx & CPUID_INTC_EDX_SEP) { 2949 add_x86_feature(featureset, X86FSET_SEP); 2950 } 2951 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) { 2952 /* 2953 * In our implementation, fxsave/fxrstor 2954 * are prerequisites before we'll even 2955 * try and do SSE things. 2956 */ 2957 if (cp->cp_edx & CPUID_INTC_EDX_SSE) { 2958 add_x86_feature(featureset, X86FSET_SSE); 2959 } 2960 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) { 2961 add_x86_feature(featureset, X86FSET_SSE2); 2962 } 2963 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) { 2964 add_x86_feature(featureset, X86FSET_SSE3); 2965 } 2966 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) { 2967 add_x86_feature(featureset, X86FSET_SSSE3); 2968 } 2969 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) { 2970 add_x86_feature(featureset, X86FSET_SSE4_1); 2971 } 2972 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) { 2973 add_x86_feature(featureset, X86FSET_SSE4_2); 2974 } 2975 if (cp->cp_ecx & CPUID_INTC_ECX_AES) { 2976 add_x86_feature(featureset, X86FSET_AES); 2977 } 2978 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { 2979 add_x86_feature(featureset, X86FSET_PCLMULQDQ); 2980 } 2981 2982 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA) 2983 add_x86_feature(featureset, X86FSET_SHA); 2984 2985 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP) 2986 add_x86_feature(featureset, X86FSET_UMIP); 2987 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU) 2988 add_x86_feature(featureset, X86FSET_PKU); 2989 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE) 2990 add_x86_feature(featureset, X86FSET_OSPKE); 2991 2992 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) { 2993 add_x86_feature(featureset, X86FSET_XSAVE); 2994 2995 /* We only test AVX & AVX512 when there is XSAVE */ 2996 2997 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) { 2998 add_x86_feature(featureset, 2999 X86FSET_AVX); 3000 3001 /* 3002 * Intel says we can't check these without also 3003 * checking AVX. 3004 */ 3005 if (cp->cp_ecx & CPUID_INTC_ECX_F16C) 3006 add_x86_feature(featureset, 3007 X86FSET_F16C); 3008 3009 if (cp->cp_ecx & CPUID_INTC_ECX_FMA) 3010 add_x86_feature(featureset, 3011 X86FSET_FMA); 3012 3013 if (cpi->cpi_std[7].cp_ebx & 3014 CPUID_INTC_EBX_7_0_BMI1) 3015 add_x86_feature(featureset, 3016 X86FSET_BMI1); 3017 3018 if (cpi->cpi_std[7].cp_ebx & 3019 CPUID_INTC_EBX_7_0_BMI2) 3020 add_x86_feature(featureset, 3021 X86FSET_BMI2); 3022 3023 if (cpi->cpi_std[7].cp_ebx & 3024 CPUID_INTC_EBX_7_0_AVX2) 3025 add_x86_feature(featureset, 3026 X86FSET_AVX2); 3027 } 3028 3029 if (cpi->cpi_vendor == X86_VENDOR_Intel && 3030 (cpi->cpi_std[7].cp_ebx & 3031 CPUID_INTC_EBX_7_0_AVX512F) != 0) { 3032 add_x86_feature(featureset, X86FSET_AVX512F); 3033 3034 if (cpi->cpi_std[7].cp_ebx & 3035 CPUID_INTC_EBX_7_0_AVX512DQ) 3036 add_x86_feature(featureset, 3037 X86FSET_AVX512DQ); 3038 if (cpi->cpi_std[7].cp_ebx & 3039 CPUID_INTC_EBX_7_0_AVX512IFMA) 3040 add_x86_feature(featureset, 3041 X86FSET_AVX512FMA); 3042 if (cpi->cpi_std[7].cp_ebx & 3043 CPUID_INTC_EBX_7_0_AVX512PF) 3044 add_x86_feature(featureset, 3045 X86FSET_AVX512PF); 3046 if (cpi->cpi_std[7].cp_ebx & 3047 CPUID_INTC_EBX_7_0_AVX512ER) 3048 add_x86_feature(featureset, 3049 X86FSET_AVX512ER); 3050 if (cpi->cpi_std[7].cp_ebx & 3051 CPUID_INTC_EBX_7_0_AVX512CD) 3052 add_x86_feature(featureset, 3053 X86FSET_AVX512CD); 3054 if (cpi->cpi_std[7].cp_ebx & 3055 CPUID_INTC_EBX_7_0_AVX512BW) 3056 add_x86_feature(featureset, 3057 X86FSET_AVX512BW); 3058 if (cpi->cpi_std[7].cp_ebx & 3059 CPUID_INTC_EBX_7_0_AVX512VL) 3060 add_x86_feature(featureset, 3061 X86FSET_AVX512VL); 3062 3063 if (cpi->cpi_std[7].cp_ecx & 3064 CPUID_INTC_ECX_7_0_AVX512VBMI) 3065 add_x86_feature(featureset, 3066 X86FSET_AVX512VBMI); 3067 if (cpi->cpi_std[7].cp_ecx & 3068 CPUID_INTC_ECX_7_0_AVX512VNNI) 3069 add_x86_feature(featureset, 3070 X86FSET_AVX512VNNI); 3071 if (cpi->cpi_std[7].cp_ecx & 3072 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ) 3073 add_x86_feature(featureset, 3074 X86FSET_AVX512VPOPCDQ); 3075 3076 if (cpi->cpi_std[7].cp_edx & 3077 CPUID_INTC_EDX_7_0_AVX5124NNIW) 3078 add_x86_feature(featureset, 3079 X86FSET_AVX512NNIW); 3080 if (cpi->cpi_std[7].cp_edx & 3081 CPUID_INTC_EDX_7_0_AVX5124FMAPS) 3082 add_x86_feature(featureset, 3083 X86FSET_AVX512FMAPS); 3084 } 3085 } 3086 } 3087 3088 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 3089 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) { 3090 add_x86_feature(featureset, X86FSET_PCID); 3091 } 3092 } 3093 3094 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) { 3095 add_x86_feature(featureset, X86FSET_X2APIC); 3096 } 3097 if (cp->cp_edx & CPUID_INTC_EDX_DE) { 3098 add_x86_feature(featureset, X86FSET_DE); 3099 } 3100 #if !defined(__xpv) 3101 if (cp->cp_ecx & CPUID_INTC_ECX_MON) { 3102 3103 /* 3104 * We require the CLFLUSH instruction for erratum workaround 3105 * to use MONITOR/MWAIT. 3106 */ 3107 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 3108 cpi->cpi_mwait.support |= MWAIT_SUPPORT; 3109 add_x86_feature(featureset, X86FSET_MWAIT); 3110 } else { 3111 extern int idle_cpu_assert_cflush_monitor; 3112 3113 /* 3114 * All processors we are aware of which have 3115 * MONITOR/MWAIT also have CLFLUSH. 3116 */ 3117 if (idle_cpu_assert_cflush_monitor) { 3118 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) && 3119 (cp->cp_edx & CPUID_INTC_EDX_CLFSH)); 3120 } 3121 } 3122 } 3123 #endif /* __xpv */ 3124 3125 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) { 3126 add_x86_feature(featureset, X86FSET_VMX); 3127 } 3128 3129 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND) 3130 add_x86_feature(featureset, X86FSET_RDRAND); 3131 3132 /* 3133 * Only need it first time, rest of the cpus would follow suit. 3134 * we only capture this for the bootcpu. 3135 */ 3136 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) { 3137 add_x86_feature(featureset, X86FSET_CLFSH); 3138 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8); 3139 } 3140 if (is_x86_feature(featureset, X86FSET_PAE)) 3141 cpi->cpi_pabits = 36; 3142 3143 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) { 3144 struct cpuid_regs r, *ecp; 3145 3146 ecp = &r; 3147 ecp->cp_eax = 0xD; 3148 ecp->cp_ecx = 1; 3149 ecp->cp_edx = ecp->cp_ebx = 0; 3150 (void) __cpuid_insn(ecp); 3151 3152 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT) 3153 add_x86_feature(featureset, X86FSET_XSAVEOPT); 3154 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC) 3155 add_x86_feature(featureset, X86FSET_XSAVEC); 3156 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES) 3157 add_x86_feature(featureset, X86FSET_XSAVES); 3158 } 3159 3160 /* 3161 * Work on the "extended" feature information, doing 3162 * some basic initialization for cpuid_pass2() 3163 */ 3164 xcpuid = 0; 3165 switch (cpi->cpi_vendor) { 3166 case X86_VENDOR_Intel: 3167 /* 3168 * On KVM we know we will have proper support for extended 3169 * cpuid. 3170 */ 3171 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf || 3172 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 && 3173 (cpi->cpi_model == 6 || cpi->cpi_model == 2))) 3174 xcpuid++; 3175 break; 3176 case X86_VENDOR_AMD: 3177 if (cpi->cpi_family > 5 || 3178 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 3179 xcpuid++; 3180 break; 3181 case X86_VENDOR_Cyrix: 3182 /* 3183 * Only these Cyrix CPUs are -known- to support 3184 * extended cpuid operations. 3185 */ 3186 if (x86_type == X86_TYPE_VIA_CYRIX_III || 3187 x86_type == X86_TYPE_CYRIX_GXm) 3188 xcpuid++; 3189 break; 3190 case X86_VENDOR_Centaur: 3191 case X86_VENDOR_TM: 3192 default: 3193 xcpuid++; 3194 break; 3195 } 3196 3197 if (xcpuid) { 3198 cp = &cpi->cpi_extd[0]; 3199 cp->cp_eax = CPUID_LEAF_EXT_0; 3200 cpi->cpi_xmaxeax = __cpuid_insn(cp); 3201 } 3202 3203 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) { 3204 3205 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX) 3206 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX; 3207 3208 switch (cpi->cpi_vendor) { 3209 case X86_VENDOR_Intel: 3210 case X86_VENDOR_AMD: 3211 if (cpi->cpi_xmaxeax < 0x80000001) 3212 break; 3213 cp = &cpi->cpi_extd[1]; 3214 cp->cp_eax = 0x80000001; 3215 (void) __cpuid_insn(cp); 3216 3217 if (cpi->cpi_vendor == X86_VENDOR_AMD && 3218 cpi->cpi_family == 5 && 3219 cpi->cpi_model == 6 && 3220 cpi->cpi_step == 6) { 3221 /* 3222 * K6 model 6 uses bit 10 to indicate SYSC 3223 * Later models use bit 11. Fix it here. 3224 */ 3225 if (cp->cp_edx & 0x400) { 3226 cp->cp_edx &= ~0x400; 3227 cp->cp_edx |= CPUID_AMD_EDX_SYSC; 3228 } 3229 } 3230 3231 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp); 3232 3233 /* 3234 * Compute the additions to the kernel's feature word. 3235 */ 3236 if (cp->cp_edx & CPUID_AMD_EDX_NX) { 3237 add_x86_feature(featureset, X86FSET_NX); 3238 } 3239 3240 /* 3241 * Regardless whether or not we boot 64-bit, 3242 * we should have a way to identify whether 3243 * the CPU is capable of running 64-bit. 3244 */ 3245 if (cp->cp_edx & CPUID_AMD_EDX_LM) { 3246 add_x86_feature(featureset, X86FSET_64); 3247 } 3248 3249 /* 1 GB large page - enable only for 64 bit kernel */ 3250 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) { 3251 add_x86_feature(featureset, X86FSET_1GPG); 3252 } 3253 3254 if ((cpi->cpi_vendor == X86_VENDOR_AMD) && 3255 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && 3256 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) { 3257 add_x86_feature(featureset, X86FSET_SSE4A); 3258 } 3259 3260 /* 3261 * It's really tricky to support syscall/sysret in 3262 * the i386 kernel; we rely on sysenter/sysexit 3263 * instead. In the amd64 kernel, things are -way- 3264 * better. 3265 */ 3266 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) { 3267 add_x86_feature(featureset, X86FSET_ASYSC); 3268 } 3269 3270 /* 3271 * While we're thinking about system calls, note 3272 * that AMD processors don't support sysenter 3273 * in long mode at all, so don't try to program them. 3274 */ 3275 if (x86_vendor == X86_VENDOR_AMD) { 3276 remove_x86_feature(featureset, X86FSET_SEP); 3277 } 3278 3279 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) { 3280 add_x86_feature(featureset, X86FSET_TSCP); 3281 } 3282 3283 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) { 3284 add_x86_feature(featureset, X86FSET_SVM); 3285 } 3286 3287 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) { 3288 add_x86_feature(featureset, X86FSET_TOPOEXT); 3289 } 3290 3291 if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) { 3292 add_x86_feature(featureset, X86FSET_AMD_PCEC); 3293 } 3294 3295 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) { 3296 add_x86_feature(featureset, X86FSET_XOP); 3297 } 3298 3299 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) { 3300 add_x86_feature(featureset, X86FSET_FMA4); 3301 } 3302 3303 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) { 3304 add_x86_feature(featureset, X86FSET_TBM); 3305 } 3306 3307 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) { 3308 add_x86_feature(featureset, X86FSET_MONITORX); 3309 } 3310 break; 3311 default: 3312 break; 3313 } 3314 3315 /* 3316 * Get CPUID data about processor cores and hyperthreads. 3317 */ 3318 switch (cpi->cpi_vendor) { 3319 case X86_VENDOR_Intel: 3320 if (cpi->cpi_maxeax >= 4) { 3321 cp = &cpi->cpi_std[4]; 3322 cp->cp_eax = 4; 3323 cp->cp_ecx = 0; 3324 (void) __cpuid_insn(cp); 3325 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp); 3326 } 3327 /*FALLTHROUGH*/ 3328 case X86_VENDOR_AMD: 3329 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) 3330 break; 3331 cp = &cpi->cpi_extd[8]; 3332 cp->cp_eax = CPUID_LEAF_EXT_8; 3333 (void) __cpuid_insn(cp); 3334 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, 3335 cp); 3336 3337 /* 3338 * AMD uses ebx for some extended functions. 3339 */ 3340 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 3341 /* 3342 * While we're here, check for the AMD "Error 3343 * Pointer Zero/Restore" feature. This can be 3344 * used to setup the FP save handlers 3345 * appropriately. 3346 */ 3347 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) { 3348 cpi->cpi_fp_amd_save = 0; 3349 } else { 3350 cpi->cpi_fp_amd_save = 1; 3351 } 3352 3353 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) { 3354 add_x86_feature(featureset, 3355 X86FSET_CLZERO); 3356 } 3357 } 3358 3359 /* 3360 * Virtual and physical address limits from 3361 * cpuid override previously guessed values. 3362 */ 3363 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0); 3364 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8); 3365 break; 3366 default: 3367 break; 3368 } 3369 3370 /* 3371 * Get CPUID data about TSC Invariance in Deep C-State. 3372 */ 3373 switch (cpi->cpi_vendor) { 3374 case X86_VENDOR_Intel: 3375 case X86_VENDOR_AMD: 3376 if (cpi->cpi_maxeax >= 7) { 3377 cp = &cpi->cpi_extd[7]; 3378 cp->cp_eax = 0x80000007; 3379 cp->cp_ecx = 0; 3380 (void) __cpuid_insn(cp); 3381 } 3382 break; 3383 default: 3384 break; 3385 } 3386 } 3387 3388 cpuid_pass1_topology(cpu, featureset); 3389 cpuid_pass1_thermal(cpu, featureset); 3390 3391 /* 3392 * Synthesize chip "revision" and socket type 3393 */ 3394 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family, 3395 cpi->cpi_model, cpi->cpi_step); 3396 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor, 3397 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step); 3398 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family, 3399 cpi->cpi_model, cpi->cpi_step); 3400 3401 if (cpi->cpi_vendor == X86_VENDOR_AMD) { 3402 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 && 3403 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) { 3404 /* Special handling for AMD FP not necessary. */ 3405 cpi->cpi_fp_amd_save = 0; 3406 } else { 3407 cpi->cpi_fp_amd_save = 1; 3408 } 3409 } 3410 3411 /* 3412 * Check the processor leaves that are used for security features. 3413 */ 3414 cpuid_scan_security(cpu, featureset); 3415 3416 pass1_done: 3417 cpi->cpi_pass = 1; 3418 } 3419 3420 /* 3421 * Make copies of the cpuid table entries we depend on, in 3422 * part for ease of parsing now, in part so that we have only 3423 * one place to correct any of it, in part for ease of 3424 * later export to userland, and in part so we can look at 3425 * this stuff in a crash dump. 3426 */ 3427 3428 /*ARGSUSED*/ 3429 void 3430 cpuid_pass2(cpu_t *cpu) 3431 { 3432 uint_t n, nmax; 3433 int i; 3434 struct cpuid_regs *cp; 3435 uint8_t *dp; 3436 uint32_t *iptr; 3437 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 3438 3439 ASSERT(cpi->cpi_pass == 1); 3440 3441 if (cpi->cpi_maxeax < 1) 3442 goto pass2_done; 3443 3444 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD) 3445 nmax = NMAX_CPI_STD; 3446 /* 3447 * (We already handled n == 0 and n == 1 in pass 1) 3448 */ 3449 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) { 3450 /* 3451 * leaves 6 and 7 were handled in pass 1 3452 */ 3453 if (n == 6 || n == 7) 3454 continue; 3455 3456 cp->cp_eax = n; 3457 3458 /* 3459 * CPUID function 4 expects %ecx to be initialized 3460 * with an index which indicates which cache to return 3461 * information about. The OS is expected to call function 4 3462 * with %ecx set to 0, 1, 2, ... until it returns with 3463 * EAX[4:0] set to 0, which indicates there are no more 3464 * caches. 3465 * 3466 * Here, populate cpi_std[4] with the information returned by 3467 * function 4 when %ecx == 0, and do the rest in cpuid_pass3() 3468 * when dynamic memory allocation becomes available. 3469 * 3470 * Note: we need to explicitly initialize %ecx here, since 3471 * function 4 may have been previously invoked. 3472 */ 3473 if (n == 4) 3474 cp->cp_ecx = 0; 3475 3476 (void) __cpuid_insn(cp); 3477 platform_cpuid_mangle(cpi->cpi_vendor, n, cp); 3478 switch (n) { 3479 case 2: 3480 /* 3481 * "the lower 8 bits of the %eax register 3482 * contain a value that identifies the number 3483 * of times the cpuid [instruction] has to be 3484 * executed to obtain a complete image of the 3485 * processor's caching systems." 3486 * 3487 * How *do* they make this stuff up? 3488 */ 3489 cpi->cpi_ncache = sizeof (*cp) * 3490 BITX(cp->cp_eax, 7, 0); 3491 if (cpi->cpi_ncache == 0) 3492 break; 3493 cpi->cpi_ncache--; /* skip count byte */ 3494 3495 /* 3496 * Well, for now, rather than attempt to implement 3497 * this slightly dubious algorithm, we just look 3498 * at the first 15 .. 3499 */ 3500 if (cpi->cpi_ncache > (sizeof (*cp) - 1)) 3501 cpi->cpi_ncache = sizeof (*cp) - 1; 3502 3503 dp = cpi->cpi_cacheinfo; 3504 if (BITX(cp->cp_eax, 31, 31) == 0) { 3505 uint8_t *p = (void *)&cp->cp_eax; 3506 for (i = 1; i < 4; i++) 3507 if (p[i] != 0) 3508 *dp++ = p[i]; 3509 } 3510 if (BITX(cp->cp_ebx, 31, 31) == 0) { 3511 uint8_t *p = (void *)&cp->cp_ebx; 3512 for (i = 0; i < 4; i++) 3513 if (p[i] != 0) 3514 *dp++ = p[i]; 3515 } 3516 if (BITX(cp->cp_ecx, 31, 31) == 0) { 3517 uint8_t *p = (void *)&cp->cp_ecx; 3518 for (i = 0; i < 4; i++) 3519 if (p[i] != 0) 3520 *dp++ = p[i]; 3521 } 3522 if (BITX(cp->cp_edx, 31, 31) == 0) { 3523 uint8_t *p = (void *)&cp->cp_edx; 3524 for (i = 0; i < 4; i++) 3525 if (p[i] != 0) 3526 *dp++ = p[i]; 3527 } 3528 break; 3529 3530 case 3: /* Processor serial number, if PSN supported */ 3531 break; 3532 3533 case 4: /* Deterministic cache parameters */ 3534 break; 3535 3536 case 5: /* Monitor/Mwait parameters */ 3537 { 3538 size_t mwait_size; 3539 3540 /* 3541 * check cpi_mwait.support which was set in cpuid_pass1 3542 */ 3543 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT)) 3544 break; 3545 3546 /* 3547 * Protect ourself from insane mwait line size. 3548 * Workaround for incomplete hardware emulator(s). 3549 */ 3550 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi); 3551 if (mwait_size < sizeof (uint32_t) || 3552 !ISP2(mwait_size)) { 3553 #if DEBUG 3554 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait " 3555 "size %ld", cpu->cpu_id, (long)mwait_size); 3556 #endif 3557 break; 3558 } 3559 3560 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi); 3561 cpi->cpi_mwait.mon_max = mwait_size; 3562 if (MWAIT_EXTENSION(cpi)) { 3563 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS; 3564 if (MWAIT_INT_ENABLE(cpi)) 3565 cpi->cpi_mwait.support |= 3566 MWAIT_ECX_INT_ENABLE; 3567 } 3568 break; 3569 } 3570 default: 3571 break; 3572 } 3573 } 3574 3575 /* 3576 * XSAVE enumeration 3577 */ 3578 if (cpi->cpi_maxeax >= 0xD) { 3579 struct cpuid_regs regs; 3580 boolean_t cpuid_d_valid = B_TRUE; 3581 3582 cp = ®s; 3583 cp->cp_eax = 0xD; 3584 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; 3585 3586 (void) __cpuid_insn(cp); 3587 3588 /* 3589 * Sanity checks for debug 3590 */ 3591 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 || 3592 (cp->cp_eax & XFEATURE_SSE) == 0) { 3593 cpuid_d_valid = B_FALSE; 3594 } 3595 3596 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax; 3597 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx; 3598 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx; 3599 3600 /* 3601 * If the hw supports AVX, get the size and offset in the save 3602 * area for the ymm state. 3603 */ 3604 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) { 3605 cp->cp_eax = 0xD; 3606 cp->cp_ecx = 2; 3607 cp->cp_edx = cp->cp_ebx = 0; 3608 3609 (void) __cpuid_insn(cp); 3610 3611 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET || 3612 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) { 3613 cpuid_d_valid = B_FALSE; 3614 } 3615 3616 cpi->cpi_xsave.ymm_size = cp->cp_eax; 3617 cpi->cpi_xsave.ymm_offset = cp->cp_ebx; 3618 } 3619 3620 /* 3621 * If the hw supports MPX, get the size and offset in the 3622 * save area for BNDREGS and BNDCSR. 3623 */ 3624 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) { 3625 cp->cp_eax = 0xD; 3626 cp->cp_ecx = 3; 3627 cp->cp_edx = cp->cp_ebx = 0; 3628 3629 (void) __cpuid_insn(cp); 3630 3631 cpi->cpi_xsave.bndregs_size = cp->cp_eax; 3632 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx; 3633 3634 cp->cp_eax = 0xD; 3635 cp->cp_ecx = 4; 3636 cp->cp_edx = cp->cp_ebx = 0; 3637 3638 (void) __cpuid_insn(cp); 3639 3640 cpi->cpi_xsave.bndcsr_size = cp->cp_eax; 3641 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx; 3642 } 3643 3644 /* 3645 * If the hw supports AVX512, get the size and offset in the 3646 * save area for the opmask registers and zmm state. 3647 */ 3648 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) { 3649 cp->cp_eax = 0xD; 3650 cp->cp_ecx = 5; 3651 cp->cp_edx = cp->cp_ebx = 0; 3652 3653 (void) __cpuid_insn(cp); 3654 3655 cpi->cpi_xsave.opmask_size = cp->cp_eax; 3656 cpi->cpi_xsave.opmask_offset = cp->cp_ebx; 3657 3658 cp->cp_eax = 0xD; 3659 cp->cp_ecx = 6; 3660 cp->cp_edx = cp->cp_ebx = 0; 3661 3662 (void) __cpuid_insn(cp); 3663 3664 cpi->cpi_xsave.zmmlo_size = cp->cp_eax; 3665 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx; 3666 3667 cp->cp_eax = 0xD; 3668 cp->cp_ecx = 7; 3669 cp->cp_edx = cp->cp_ebx = 0; 3670 3671 (void) __cpuid_insn(cp); 3672 3673 cpi->cpi_xsave.zmmhi_size = cp->cp_eax; 3674 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx; 3675 } 3676 3677 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { 3678 xsave_state_size = 0; 3679 } else if (cpuid_d_valid) { 3680 xsave_state_size = cpi->cpi_xsave.xsav_max_size; 3681 } else { 3682 /* Broken CPUID 0xD, probably in HVM */ 3683 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid " 3684 "value: hw_low = %d, hw_high = %d, xsave_size = %d" 3685 ", ymm_size = %d, ymm_offset = %d\n", 3686 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low, 3687 cpi->cpi_xsave.xsav_hw_features_high, 3688 (int)cpi->cpi_xsave.xsav_max_size, 3689 (int)cpi->cpi_xsave.ymm_size, 3690 (int)cpi->cpi_xsave.ymm_offset); 3691 3692 if (xsave_state_size != 0) { 3693 /* 3694 * This must be a non-boot CPU. We cannot 3695 * continue, because boot cpu has already 3696 * enabled XSAVE. 3697 */ 3698 ASSERT(cpu->cpu_id != 0); 3699 cmn_err(CE_PANIC, "cpu%d: we have already " 3700 "enabled XSAVE on boot cpu, cannot " 3701 "continue.", cpu->cpu_id); 3702 } else { 3703 /* 3704 * If we reached here on the boot CPU, it's also 3705 * almost certain that we'll reach here on the 3706 * non-boot CPUs. When we're here on a boot CPU 3707 * we should disable the feature, on a non-boot 3708 * CPU we need to confirm that we have. 3709 */ 3710 if (cpu->cpu_id == 0) { 3711 remove_x86_feature(x86_featureset, 3712 X86FSET_XSAVE); 3713 remove_x86_feature(x86_featureset, 3714 X86FSET_AVX); 3715 remove_x86_feature(x86_featureset, 3716 X86FSET_F16C); 3717 remove_x86_feature(x86_featureset, 3718 X86FSET_BMI1); 3719 remove_x86_feature(x86_featureset, 3720 X86FSET_BMI2); 3721 remove_x86_feature(x86_featureset, 3722 X86FSET_FMA); 3723 remove_x86_feature(x86_featureset, 3724 X86FSET_AVX2); 3725 remove_x86_feature(x86_featureset, 3726 X86FSET_MPX); 3727 remove_x86_feature(x86_featureset, 3728 X86FSET_AVX512F); 3729 remove_x86_feature(x86_featureset, 3730 X86FSET_AVX512DQ); 3731 remove_x86_feature(x86_featureset, 3732 X86FSET_AVX512PF); 3733 remove_x86_feature(x86_featureset, 3734 X86FSET_AVX512ER); 3735 remove_x86_feature(x86_featureset, 3736 X86FSET_AVX512CD); 3737 remove_x86_feature(x86_featureset, 3738 X86FSET_AVX512BW); 3739 remove_x86_feature(x86_featureset, 3740 X86FSET_AVX512VL); 3741 remove_x86_feature(x86_featureset, 3742 X86FSET_AVX512FMA); 3743 remove_x86_feature(x86_featureset, 3744 X86FSET_AVX512VBMI); 3745 remove_x86_feature(x86_featureset, 3746 X86FSET_AVX512VNNI); 3747 remove_x86_feature(x86_featureset, 3748 X86FSET_AVX512VPOPCDQ); 3749 remove_x86_feature(x86_featureset, 3750 X86FSET_AVX512NNIW); 3751 remove_x86_feature(x86_featureset, 3752 X86FSET_AVX512FMAPS); 3753 3754 CPI_FEATURES_ECX(cpi) &= 3755 ~CPUID_INTC_ECX_XSAVE; 3756 CPI_FEATURES_ECX(cpi) &= 3757 ~CPUID_INTC_ECX_AVX; 3758 CPI_FEATURES_ECX(cpi) &= 3759 ~CPUID_INTC_ECX_F16C; 3760 CPI_FEATURES_ECX(cpi) &= 3761 ~CPUID_INTC_ECX_FMA; 3762 CPI_FEATURES_7_0_EBX(cpi) &= 3763 ~CPUID_INTC_EBX_7_0_BMI1; 3764 CPI_FEATURES_7_0_EBX(cpi) &= 3765 ~CPUID_INTC_EBX_7_0_BMI2; 3766 CPI_FEATURES_7_0_EBX(cpi) &= 3767 ~CPUID_INTC_EBX_7_0_AVX2; 3768 CPI_FEATURES_7_0_EBX(cpi) &= 3769 ~CPUID_INTC_EBX_7_0_MPX; 3770 CPI_FEATURES_7_0_EBX(cpi) &= 3771 ~CPUID_INTC_EBX_7_0_ALL_AVX512; 3772 3773 CPI_FEATURES_7_0_ECX(cpi) &= 3774 ~CPUID_INTC_ECX_7_0_ALL_AVX512; 3775 3776 CPI_FEATURES_7_0_EDX(cpi) &= 3777 ~CPUID_INTC_EDX_7_0_ALL_AVX512; 3778 3779 xsave_force_disable = B_TRUE; 3780 } else { 3781 VERIFY(is_x86_feature(x86_featureset, 3782 X86FSET_XSAVE) == B_FALSE); 3783 } 3784 } 3785 } 3786 } 3787 3788 3789 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) 3790 goto pass2_done; 3791 3792 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD) 3793 nmax = NMAX_CPI_EXTD; 3794 /* 3795 * Copy the extended properties, fixing them as we go. 3796 * (We already handled n == 0 and n == 1 in pass 1) 3797 */ 3798 iptr = (void *)cpi->cpi_brandstr; 3799 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) { 3800 cp->cp_eax = CPUID_LEAF_EXT_0 + n; 3801 (void) __cpuid_insn(cp); 3802 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n, 3803 cp); 3804 switch (n) { 3805 case 2: 3806 case 3: 3807 case 4: 3808 /* 3809 * Extract the brand string 3810 */ 3811 *iptr++ = cp->cp_eax; 3812 *iptr++ = cp->cp_ebx; 3813 *iptr++ = cp->cp_ecx; 3814 *iptr++ = cp->cp_edx; 3815 break; 3816 case 5: 3817 switch (cpi->cpi_vendor) { 3818 case X86_VENDOR_AMD: 3819 /* 3820 * The Athlon and Duron were the first 3821 * parts to report the sizes of the 3822 * TLB for large pages. Before then, 3823 * we don't trust the data. 3824 */ 3825 if (cpi->cpi_family < 6 || 3826 (cpi->cpi_family == 6 && 3827 cpi->cpi_model < 1)) 3828 cp->cp_eax = 0; 3829 break; 3830 default: 3831 break; 3832 } 3833 break; 3834 case 6: 3835 switch (cpi->cpi_vendor) { 3836 case X86_VENDOR_AMD: 3837 /* 3838 * The Athlon and Duron were the first 3839 * AMD parts with L2 TLB's. 3840 * Before then, don't trust the data. 3841 */ 3842 if (cpi->cpi_family < 6 || 3843 cpi->cpi_family == 6 && 3844 cpi->cpi_model < 1) 3845 cp->cp_eax = cp->cp_ebx = 0; 3846 /* 3847 * AMD Duron rev A0 reports L2 3848 * cache size incorrectly as 1K 3849 * when it is really 64K 3850 */ 3851 if (cpi->cpi_family == 6 && 3852 cpi->cpi_model == 3 && 3853 cpi->cpi_step == 0) { 3854 cp->cp_ecx &= 0xffff; 3855 cp->cp_ecx |= 0x400000; 3856 } 3857 break; 3858 case X86_VENDOR_Cyrix: /* VIA C3 */ 3859 /* 3860 * VIA C3 processors are a bit messed 3861 * up w.r.t. encoding cache sizes in %ecx 3862 */ 3863 if (cpi->cpi_family != 6) 3864 break; 3865 /* 3866 * model 7 and 8 were incorrectly encoded 3867 * 3868 * xxx is model 8 really broken? 3869 */ 3870 if (cpi->cpi_model == 7 || 3871 cpi->cpi_model == 8) 3872 cp->cp_ecx = 3873 BITX(cp->cp_ecx, 31, 24) << 16 | 3874 BITX(cp->cp_ecx, 23, 16) << 12 | 3875 BITX(cp->cp_ecx, 15, 8) << 8 | 3876 BITX(cp->cp_ecx, 7, 0); 3877 /* 3878 * model 9 stepping 1 has wrong associativity 3879 */ 3880 if (cpi->cpi_model == 9 && cpi->cpi_step == 1) 3881 cp->cp_ecx |= 8 << 12; 3882 break; 3883 case X86_VENDOR_Intel: 3884 /* 3885 * Extended L2 Cache features function. 3886 * First appeared on Prescott. 3887 */ 3888 default: 3889 break; 3890 } 3891 break; 3892 default: 3893 break; 3894 } 3895 } 3896 3897 pass2_done: 3898 cpi->cpi_pass = 2; 3899 } 3900 3901 static const char * 3902 intel_cpubrand(const struct cpuid_info *cpi) 3903 { 3904 int i; 3905 3906 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 3907 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 3908 return ("i486"); 3909 3910 switch (cpi->cpi_family) { 3911 case 5: 3912 return ("Intel Pentium(r)"); 3913 case 6: 3914 switch (cpi->cpi_model) { 3915 uint_t celeron, xeon; 3916 const struct cpuid_regs *cp; 3917 case 0: 3918 case 1: 3919 case 2: 3920 return ("Intel Pentium(r) Pro"); 3921 case 3: 3922 case 4: 3923 return ("Intel Pentium(r) II"); 3924 case 6: 3925 return ("Intel Celeron(r)"); 3926 case 5: 3927 case 7: 3928 celeron = xeon = 0; 3929 cp = &cpi->cpi_std[2]; /* cache info */ 3930 3931 for (i = 1; i < 4; i++) { 3932 uint_t tmp; 3933 3934 tmp = (cp->cp_eax >> (8 * i)) & 0xff; 3935 if (tmp == 0x40) 3936 celeron++; 3937 if (tmp >= 0x44 && tmp <= 0x45) 3938 xeon++; 3939 } 3940 3941 for (i = 0; i < 2; i++) { 3942 uint_t tmp; 3943 3944 tmp = (cp->cp_ebx >> (8 * i)) & 0xff; 3945 if (tmp == 0x40) 3946 celeron++; 3947 else if (tmp >= 0x44 && tmp <= 0x45) 3948 xeon++; 3949 } 3950 3951 for (i = 0; i < 4; i++) { 3952 uint_t tmp; 3953 3954 tmp = (cp->cp_ecx >> (8 * i)) & 0xff; 3955 if (tmp == 0x40) 3956 celeron++; 3957 else if (tmp >= 0x44 && tmp <= 0x45) 3958 xeon++; 3959 } 3960 3961 for (i = 0; i < 4; i++) { 3962 uint_t tmp; 3963 3964 tmp = (cp->cp_edx >> (8 * i)) & 0xff; 3965 if (tmp == 0x40) 3966 celeron++; 3967 else if (tmp >= 0x44 && tmp <= 0x45) 3968 xeon++; 3969 } 3970 3971 if (celeron) 3972 return ("Intel Celeron(r)"); 3973 if (xeon) 3974 return (cpi->cpi_model == 5 ? 3975 "Intel Pentium(r) II Xeon(tm)" : 3976 "Intel Pentium(r) III Xeon(tm)"); 3977 return (cpi->cpi_model == 5 ? 3978 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" : 3979 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)"); 3980 default: 3981 break; 3982 } 3983 default: 3984 break; 3985 } 3986 3987 /* BrandID is present if the field is nonzero */ 3988 if (cpi->cpi_brandid != 0) { 3989 static const struct { 3990 uint_t bt_bid; 3991 const char *bt_str; 3992 } brand_tbl[] = { 3993 { 0x1, "Intel(r) Celeron(r)" }, 3994 { 0x2, "Intel(r) Pentium(r) III" }, 3995 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" }, 3996 { 0x4, "Intel(r) Pentium(r) III" }, 3997 { 0x6, "Mobile Intel(r) Pentium(r) III" }, 3998 { 0x7, "Mobile Intel(r) Celeron(r)" }, 3999 { 0x8, "Intel(r) Pentium(r) 4" }, 4000 { 0x9, "Intel(r) Pentium(r) 4" }, 4001 { 0xa, "Intel(r) Celeron(r)" }, 4002 { 0xb, "Intel(r) Xeon(tm)" }, 4003 { 0xc, "Intel(r) Xeon(tm) MP" }, 4004 { 0xe, "Mobile Intel(r) Pentium(r) 4" }, 4005 { 0xf, "Mobile Intel(r) Celeron(r)" }, 4006 { 0x11, "Mobile Genuine Intel(r)" }, 4007 { 0x12, "Intel(r) Celeron(r) M" }, 4008 { 0x13, "Mobile Intel(r) Celeron(r)" }, 4009 { 0x14, "Intel(r) Celeron(r)" }, 4010 { 0x15, "Mobile Genuine Intel(r)" }, 4011 { 0x16, "Intel(r) Pentium(r) M" }, 4012 { 0x17, "Mobile Intel(r) Celeron(r)" } 4013 }; 4014 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]); 4015 uint_t sgn; 4016 4017 sgn = (cpi->cpi_family << 8) | 4018 (cpi->cpi_model << 4) | cpi->cpi_step; 4019 4020 for (i = 0; i < btblmax; i++) 4021 if (brand_tbl[i].bt_bid == cpi->cpi_brandid) 4022 break; 4023 if (i < btblmax) { 4024 if (sgn == 0x6b1 && cpi->cpi_brandid == 3) 4025 return ("Intel(r) Celeron(r)"); 4026 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb) 4027 return ("Intel(r) Xeon(tm) MP"); 4028 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe) 4029 return ("Intel(r) Xeon(tm)"); 4030 return (brand_tbl[i].bt_str); 4031 } 4032 } 4033 4034 return (NULL); 4035 } 4036 4037 static const char * 4038 amd_cpubrand(const struct cpuid_info *cpi) 4039 { 4040 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 4041 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5) 4042 return ("i486 compatible"); 4043 4044 switch (cpi->cpi_family) { 4045 case 5: 4046 switch (cpi->cpi_model) { 4047 case 0: 4048 case 1: 4049 case 2: 4050 case 3: 4051 case 4: 4052 case 5: 4053 return ("AMD-K5(r)"); 4054 case 6: 4055 case 7: 4056 return ("AMD-K6(r)"); 4057 case 8: 4058 return ("AMD-K6(r)-2"); 4059 case 9: 4060 return ("AMD-K6(r)-III"); 4061 default: 4062 return ("AMD (family 5)"); 4063 } 4064 case 6: 4065 switch (cpi->cpi_model) { 4066 case 1: 4067 return ("AMD-K7(tm)"); 4068 case 0: 4069 case 2: 4070 case 4: 4071 return ("AMD Athlon(tm)"); 4072 case 3: 4073 case 7: 4074 return ("AMD Duron(tm)"); 4075 case 6: 4076 case 8: 4077 case 10: 4078 /* 4079 * Use the L2 cache size to distinguish 4080 */ 4081 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ? 4082 "AMD Athlon(tm)" : "AMD Duron(tm)"); 4083 default: 4084 return ("AMD (family 6)"); 4085 } 4086 default: 4087 break; 4088 } 4089 4090 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 && 4091 cpi->cpi_brandid != 0) { 4092 switch (BITX(cpi->cpi_brandid, 7, 5)) { 4093 case 3: 4094 return ("AMD Opteron(tm) UP 1xx"); 4095 case 4: 4096 return ("AMD Opteron(tm) DP 2xx"); 4097 case 5: 4098 return ("AMD Opteron(tm) MP 8xx"); 4099 default: 4100 return ("AMD Opteron(tm)"); 4101 } 4102 } 4103 4104 return (NULL); 4105 } 4106 4107 static const char * 4108 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type) 4109 { 4110 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) || 4111 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 || 4112 type == X86_TYPE_CYRIX_486) 4113 return ("i486 compatible"); 4114 4115 switch (type) { 4116 case X86_TYPE_CYRIX_6x86: 4117 return ("Cyrix 6x86"); 4118 case X86_TYPE_CYRIX_6x86L: 4119 return ("Cyrix 6x86L"); 4120 case X86_TYPE_CYRIX_6x86MX: 4121 return ("Cyrix 6x86MX"); 4122 case X86_TYPE_CYRIX_GXm: 4123 return ("Cyrix GXm"); 4124 case X86_TYPE_CYRIX_MediaGX: 4125 return ("Cyrix MediaGX"); 4126 case X86_TYPE_CYRIX_MII: 4127 return ("Cyrix M2"); 4128 case X86_TYPE_VIA_CYRIX_III: 4129 return ("VIA Cyrix M3"); 4130 default: 4131 /* 4132 * Have another wild guess .. 4133 */ 4134 if (cpi->cpi_family == 4 && cpi->cpi_model == 9) 4135 return ("Cyrix 5x86"); 4136 else if (cpi->cpi_family == 5) { 4137 switch (cpi->cpi_model) { 4138 case 2: 4139 return ("Cyrix 6x86"); /* Cyrix M1 */ 4140 case 4: 4141 return ("Cyrix MediaGX"); 4142 default: 4143 break; 4144 } 4145 } else if (cpi->cpi_family == 6) { 4146 switch (cpi->cpi_model) { 4147 case 0: 4148 return ("Cyrix 6x86MX"); /* Cyrix M2? */ 4149 case 5: 4150 case 6: 4151 case 7: 4152 case 8: 4153 case 9: 4154 return ("VIA C3"); 4155 default: 4156 break; 4157 } 4158 } 4159 break; 4160 } 4161 return (NULL); 4162 } 4163 4164 /* 4165 * This only gets called in the case that the CPU extended 4166 * feature brand string (0x80000002, 0x80000003, 0x80000004) 4167 * aren't available, or contain null bytes for some reason. 4168 */ 4169 static void 4170 fabricate_brandstr(struct cpuid_info *cpi) 4171 { 4172 const char *brand = NULL; 4173 4174 switch (cpi->cpi_vendor) { 4175 case X86_VENDOR_Intel: 4176 brand = intel_cpubrand(cpi); 4177 break; 4178 case X86_VENDOR_AMD: 4179 brand = amd_cpubrand(cpi); 4180 break; 4181 case X86_VENDOR_Cyrix: 4182 brand = cyrix_cpubrand(cpi, x86_type); 4183 break; 4184 case X86_VENDOR_NexGen: 4185 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 4186 brand = "NexGen Nx586"; 4187 break; 4188 case X86_VENDOR_Centaur: 4189 if (cpi->cpi_family == 5) 4190 switch (cpi->cpi_model) { 4191 case 4: 4192 brand = "Centaur C6"; 4193 break; 4194 case 8: 4195 brand = "Centaur C2"; 4196 break; 4197 case 9: 4198 brand = "Centaur C3"; 4199 break; 4200 default: 4201 break; 4202 } 4203 break; 4204 case X86_VENDOR_Rise: 4205 if (cpi->cpi_family == 5 && 4206 (cpi->cpi_model == 0 || cpi->cpi_model == 2)) 4207 brand = "Rise mP6"; 4208 break; 4209 case X86_VENDOR_SiS: 4210 if (cpi->cpi_family == 5 && cpi->cpi_model == 0) 4211 brand = "SiS 55x"; 4212 break; 4213 case X86_VENDOR_TM: 4214 if (cpi->cpi_family == 5 && cpi->cpi_model == 4) 4215 brand = "Transmeta Crusoe TM3x00 or TM5x00"; 4216 break; 4217 case X86_VENDOR_NSC: 4218 case X86_VENDOR_UMC: 4219 default: 4220 break; 4221 } 4222 if (brand) { 4223 (void) strcpy((char *)cpi->cpi_brandstr, brand); 4224 return; 4225 } 4226 4227 /* 4228 * If all else fails ... 4229 */ 4230 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr), 4231 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family, 4232 cpi->cpi_model, cpi->cpi_step); 4233 } 4234 4235 /* 4236 * This routine is called just after kernel memory allocation 4237 * becomes available on cpu0, and as part of mp_startup() on 4238 * the other cpus. 4239 * 4240 * Fixup the brand string, and collect any information from cpuid 4241 * that requires dynamically allocated storage to represent. 4242 */ 4243 /*ARGSUSED*/ 4244 void 4245 cpuid_pass3(cpu_t *cpu) 4246 { 4247 int i, max, shft, level, size; 4248 struct cpuid_regs regs; 4249 struct cpuid_regs *cp; 4250 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4251 4252 ASSERT(cpi->cpi_pass == 2); 4253 4254 /* 4255 * Deterministic cache parameters 4256 * 4257 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The 4258 * values that are present are currently defined to be the same. This 4259 * means we can use the same logic to parse it as long as we use the 4260 * appropriate leaf to get the data. If you're updating this, make sure 4261 * you're careful about which vendor supports which aspect. 4262 * 4263 * Take this opportunity to detect the number of threads sharing the 4264 * last level cache, and construct a corresponding cache id. The 4265 * respective cpuid_info members are initialized to the default case of 4266 * "no last level cache sharing". 4267 */ 4268 cpi->cpi_ncpu_shr_last_cache = 1; 4269 cpi->cpi_last_lvl_cacheid = cpu->cpu_id; 4270 4271 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) || 4272 (cpi->cpi_vendor == X86_VENDOR_AMD && 4273 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d && 4274 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) { 4275 uint32_t leaf; 4276 4277 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 4278 leaf = 4; 4279 } else { 4280 leaf = CPUID_LEAF_EXT_1d; 4281 } 4282 4283 /* 4284 * Find the # of elements (size) returned by the leaf and along 4285 * the way detect last level cache sharing details. 4286 */ 4287 bzero(®s, sizeof (regs)); 4288 cp = ®s; 4289 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) { 4290 cp->cp_eax = leaf; 4291 cp->cp_ecx = i; 4292 4293 (void) __cpuid_insn(cp); 4294 4295 if (CPI_CACHE_TYPE(cp) == 0) 4296 break; 4297 level = CPI_CACHE_LVL(cp); 4298 if (level > max) { 4299 max = level; 4300 cpi->cpi_ncpu_shr_last_cache = 4301 CPI_NTHR_SHR_CACHE(cp) + 1; 4302 } 4303 } 4304 cpi->cpi_cache_leaf_size = size = i; 4305 4306 /* 4307 * Allocate the cpi_cache_leaves array. The first element 4308 * references the regs for the corresponding leaf with %ecx set 4309 * to 0. This was gathered in cpuid_pass2(). 4310 */ 4311 if (size > 0) { 4312 cpi->cpi_cache_leaves = 4313 kmem_alloc(size * sizeof (cp), KM_SLEEP); 4314 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 4315 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4]; 4316 } else { 4317 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d]; 4318 } 4319 4320 /* 4321 * Allocate storage to hold the additional regs 4322 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size. 4323 * 4324 * The regs for the leaf, %ecx == 0 has already 4325 * been allocated as indicated above. 4326 */ 4327 for (i = 1; i < size; i++) { 4328 cp = cpi->cpi_cache_leaves[i] = 4329 kmem_zalloc(sizeof (regs), KM_SLEEP); 4330 cp->cp_eax = leaf; 4331 cp->cp_ecx = i; 4332 4333 (void) __cpuid_insn(cp); 4334 } 4335 } 4336 /* 4337 * Determine the number of bits needed to represent 4338 * the number of CPUs sharing the last level cache. 4339 * 4340 * Shift off that number of bits from the APIC id to 4341 * derive the cache id. 4342 */ 4343 shft = 0; 4344 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1) 4345 shft++; 4346 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft; 4347 } 4348 4349 /* 4350 * Now fixup the brand string 4351 */ 4352 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) { 4353 fabricate_brandstr(cpi); 4354 } else { 4355 4356 /* 4357 * If we successfully extracted a brand string from the cpuid 4358 * instruction, clean it up by removing leading spaces and 4359 * similar junk. 4360 */ 4361 if (cpi->cpi_brandstr[0]) { 4362 size_t maxlen = sizeof (cpi->cpi_brandstr); 4363 char *src, *dst; 4364 4365 dst = src = (char *)cpi->cpi_brandstr; 4366 src[maxlen - 1] = '\0'; 4367 /* 4368 * strip leading spaces 4369 */ 4370 while (*src == ' ') 4371 src++; 4372 /* 4373 * Remove any 'Genuine' or "Authentic" prefixes 4374 */ 4375 if (strncmp(src, "Genuine ", 8) == 0) 4376 src += 8; 4377 if (strncmp(src, "Authentic ", 10) == 0) 4378 src += 10; 4379 4380 /* 4381 * Now do an in-place copy. 4382 * Map (R) to (r) and (TM) to (tm). 4383 * The era of teletypes is long gone, and there's 4384 * -really- no need to shout. 4385 */ 4386 while (*src != '\0') { 4387 if (src[0] == '(') { 4388 if (strncmp(src + 1, "R)", 2) == 0) { 4389 (void) strncpy(dst, "(r)", 3); 4390 src += 3; 4391 dst += 3; 4392 continue; 4393 } 4394 if (strncmp(src + 1, "TM)", 3) == 0) { 4395 (void) strncpy(dst, "(tm)", 4); 4396 src += 4; 4397 dst += 4; 4398 continue; 4399 } 4400 } 4401 *dst++ = *src++; 4402 } 4403 *dst = '\0'; 4404 4405 /* 4406 * Finally, remove any trailing spaces 4407 */ 4408 while (--dst > cpi->cpi_brandstr) 4409 if (*dst == ' ') 4410 *dst = '\0'; 4411 else 4412 break; 4413 } else 4414 fabricate_brandstr(cpi); 4415 } 4416 cpi->cpi_pass = 3; 4417 } 4418 4419 /* 4420 * This routine is called out of bind_hwcap() much later in the life 4421 * of the kernel (post_startup()). The job of this routine is to resolve 4422 * the hardware feature support and kernel support for those features into 4423 * what we're actually going to tell applications via the aux vector. 4424 */ 4425 void 4426 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out) 4427 { 4428 struct cpuid_info *cpi; 4429 uint_t hwcap_flags = 0, hwcap_flags_2 = 0; 4430 4431 if (cpu == NULL) 4432 cpu = CPU; 4433 cpi = cpu->cpu_m.mcpu_cpi; 4434 4435 ASSERT(cpi->cpi_pass == 3); 4436 4437 if (cpi->cpi_maxeax >= 1) { 4438 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES]; 4439 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES]; 4440 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES]; 4441 4442 *edx = CPI_FEATURES_EDX(cpi); 4443 *ecx = CPI_FEATURES_ECX(cpi); 4444 *ebx = CPI_FEATURES_7_0_EBX(cpi); 4445 4446 /* 4447 * [these require explicit kernel support] 4448 */ 4449 if (!is_x86_feature(x86_featureset, X86FSET_SEP)) 4450 *edx &= ~CPUID_INTC_EDX_SEP; 4451 4452 if (!is_x86_feature(x86_featureset, X86FSET_SSE)) 4453 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE); 4454 if (!is_x86_feature(x86_featureset, X86FSET_SSE2)) 4455 *edx &= ~CPUID_INTC_EDX_SSE2; 4456 4457 if (!is_x86_feature(x86_featureset, X86FSET_HTT)) 4458 *edx &= ~CPUID_INTC_EDX_HTT; 4459 4460 if (!is_x86_feature(x86_featureset, X86FSET_SSE3)) 4461 *ecx &= ~CPUID_INTC_ECX_SSE3; 4462 4463 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3)) 4464 *ecx &= ~CPUID_INTC_ECX_SSSE3; 4465 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1)) 4466 *ecx &= ~CPUID_INTC_ECX_SSE4_1; 4467 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2)) 4468 *ecx &= ~CPUID_INTC_ECX_SSE4_2; 4469 if (!is_x86_feature(x86_featureset, X86FSET_AES)) 4470 *ecx &= ~CPUID_INTC_ECX_AES; 4471 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) 4472 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; 4473 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE)) 4474 *ecx &= ~(CPUID_INTC_ECX_XSAVE | 4475 CPUID_INTC_ECX_OSXSAVE); 4476 if (!is_x86_feature(x86_featureset, X86FSET_AVX)) 4477 *ecx &= ~CPUID_INTC_ECX_AVX; 4478 if (!is_x86_feature(x86_featureset, X86FSET_F16C)) 4479 *ecx &= ~CPUID_INTC_ECX_F16C; 4480 if (!is_x86_feature(x86_featureset, X86FSET_FMA)) 4481 *ecx &= ~CPUID_INTC_ECX_FMA; 4482 if (!is_x86_feature(x86_featureset, X86FSET_BMI1)) 4483 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1; 4484 if (!is_x86_feature(x86_featureset, X86FSET_BMI2)) 4485 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2; 4486 if (!is_x86_feature(x86_featureset, X86FSET_AVX2)) 4487 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2; 4488 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED)) 4489 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED; 4490 if (!is_x86_feature(x86_featureset, X86FSET_ADX)) 4491 *ebx &= ~CPUID_INTC_EBX_7_0_ADX; 4492 4493 /* 4494 * [no explicit support required beyond x87 fp context] 4495 */ 4496 if (!fpu_exists) 4497 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX); 4498 4499 /* 4500 * Now map the supported feature vector to things that we 4501 * think userland will care about. 4502 */ 4503 if (*edx & CPUID_INTC_EDX_SEP) 4504 hwcap_flags |= AV_386_SEP; 4505 if (*edx & CPUID_INTC_EDX_SSE) 4506 hwcap_flags |= AV_386_FXSR | AV_386_SSE; 4507 if (*edx & CPUID_INTC_EDX_SSE2) 4508 hwcap_flags |= AV_386_SSE2; 4509 if (*ecx & CPUID_INTC_ECX_SSE3) 4510 hwcap_flags |= AV_386_SSE3; 4511 if (*ecx & CPUID_INTC_ECX_SSSE3) 4512 hwcap_flags |= AV_386_SSSE3; 4513 if (*ecx & CPUID_INTC_ECX_SSE4_1) 4514 hwcap_flags |= AV_386_SSE4_1; 4515 if (*ecx & CPUID_INTC_ECX_SSE4_2) 4516 hwcap_flags |= AV_386_SSE4_2; 4517 if (*ecx & CPUID_INTC_ECX_MOVBE) 4518 hwcap_flags |= AV_386_MOVBE; 4519 if (*ecx & CPUID_INTC_ECX_AES) 4520 hwcap_flags |= AV_386_AES; 4521 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) 4522 hwcap_flags |= AV_386_PCLMULQDQ; 4523 if ((*ecx & CPUID_INTC_ECX_XSAVE) && 4524 (*ecx & CPUID_INTC_ECX_OSXSAVE)) { 4525 hwcap_flags |= AV_386_XSAVE; 4526 4527 if (*ecx & CPUID_INTC_ECX_AVX) { 4528 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi); 4529 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi); 4530 4531 hwcap_flags |= AV_386_AVX; 4532 if (*ecx & CPUID_INTC_ECX_F16C) 4533 hwcap_flags_2 |= AV_386_2_F16C; 4534 if (*ecx & CPUID_INTC_ECX_FMA) 4535 hwcap_flags_2 |= AV_386_2_FMA; 4536 4537 if (*ebx & CPUID_INTC_EBX_7_0_BMI1) 4538 hwcap_flags_2 |= AV_386_2_BMI1; 4539 if (*ebx & CPUID_INTC_EBX_7_0_BMI2) 4540 hwcap_flags_2 |= AV_386_2_BMI2; 4541 if (*ebx & CPUID_INTC_EBX_7_0_AVX2) 4542 hwcap_flags_2 |= AV_386_2_AVX2; 4543 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F) 4544 hwcap_flags_2 |= AV_386_2_AVX512F; 4545 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ) 4546 hwcap_flags_2 |= AV_386_2_AVX512DQ; 4547 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA) 4548 hwcap_flags_2 |= AV_386_2_AVX512IFMA; 4549 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF) 4550 hwcap_flags_2 |= AV_386_2_AVX512PF; 4551 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER) 4552 hwcap_flags_2 |= AV_386_2_AVX512ER; 4553 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD) 4554 hwcap_flags_2 |= AV_386_2_AVX512CD; 4555 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW) 4556 hwcap_flags_2 |= AV_386_2_AVX512BW; 4557 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL) 4558 hwcap_flags_2 |= AV_386_2_AVX512VL; 4559 4560 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI) 4561 hwcap_flags_2 |= AV_386_2_AVX512VBMI; 4562 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI) 4563 hwcap_flags_2 |= AV_386_2_AVX512_VNNI; 4564 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ) 4565 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ; 4566 4567 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW) 4568 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW; 4569 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS) 4570 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS; 4571 } 4572 } 4573 if (*ecx & CPUID_INTC_ECX_VMX) 4574 hwcap_flags |= AV_386_VMX; 4575 if (*ecx & CPUID_INTC_ECX_POPCNT) 4576 hwcap_flags |= AV_386_POPCNT; 4577 if (*edx & CPUID_INTC_EDX_FPU) 4578 hwcap_flags |= AV_386_FPU; 4579 if (*edx & CPUID_INTC_EDX_MMX) 4580 hwcap_flags |= AV_386_MMX; 4581 4582 if (*edx & CPUID_INTC_EDX_TSC) 4583 hwcap_flags |= AV_386_TSC; 4584 if (*edx & CPUID_INTC_EDX_CX8) 4585 hwcap_flags |= AV_386_CX8; 4586 if (*edx & CPUID_INTC_EDX_CMOV) 4587 hwcap_flags |= AV_386_CMOV; 4588 if (*ecx & CPUID_INTC_ECX_CX16) 4589 hwcap_flags |= AV_386_CX16; 4590 4591 if (*ecx & CPUID_INTC_ECX_RDRAND) 4592 hwcap_flags_2 |= AV_386_2_RDRAND; 4593 if (*ebx & CPUID_INTC_EBX_7_0_ADX) 4594 hwcap_flags_2 |= AV_386_2_ADX; 4595 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED) 4596 hwcap_flags_2 |= AV_386_2_RDSEED; 4597 if (*ebx & CPUID_INTC_EBX_7_0_SHA) 4598 hwcap_flags_2 |= AV_386_2_SHA; 4599 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE) 4600 hwcap_flags_2 |= AV_386_2_FSGSBASE; 4601 if (*ebx & CPUID_INTC_EBX_7_0_CLWB) 4602 hwcap_flags_2 |= AV_386_2_CLWB; 4603 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT) 4604 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT; 4605 4606 } 4607 /* 4608 * Check a few miscilaneous features. 4609 */ 4610 if (is_x86_feature(x86_featureset, X86FSET_CLZERO)) 4611 hwcap_flags_2 |= AV_386_2_CLZERO; 4612 4613 if (cpi->cpi_xmaxeax < 0x80000001) 4614 goto pass4_done; 4615 4616 switch (cpi->cpi_vendor) { 4617 struct cpuid_regs cp; 4618 uint32_t *edx, *ecx; 4619 4620 case X86_VENDOR_Intel: 4621 /* 4622 * Seems like Intel duplicated what we necessary 4623 * here to make the initial crop of 64-bit OS's work. 4624 * Hopefully, those are the only "extended" bits 4625 * they'll add. 4626 */ 4627 /*FALLTHROUGH*/ 4628 4629 case X86_VENDOR_AMD: 4630 edx = &cpi->cpi_support[AMD_EDX_FEATURES]; 4631 ecx = &cpi->cpi_support[AMD_ECX_FEATURES]; 4632 4633 *edx = CPI_FEATURES_XTD_EDX(cpi); 4634 *ecx = CPI_FEATURES_XTD_ECX(cpi); 4635 4636 /* 4637 * [these features require explicit kernel support] 4638 */ 4639 switch (cpi->cpi_vendor) { 4640 case X86_VENDOR_Intel: 4641 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 4642 *edx &= ~CPUID_AMD_EDX_TSCP; 4643 break; 4644 4645 case X86_VENDOR_AMD: 4646 if (!is_x86_feature(x86_featureset, X86FSET_TSCP)) 4647 *edx &= ~CPUID_AMD_EDX_TSCP; 4648 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A)) 4649 *ecx &= ~CPUID_AMD_ECX_SSE4A; 4650 break; 4651 4652 default: 4653 break; 4654 } 4655 4656 /* 4657 * [no explicit support required beyond 4658 * x87 fp context and exception handlers] 4659 */ 4660 if (!fpu_exists) 4661 *edx &= ~(CPUID_AMD_EDX_MMXamd | 4662 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx); 4663 4664 if (!is_x86_feature(x86_featureset, X86FSET_NX)) 4665 *edx &= ~CPUID_AMD_EDX_NX; 4666 #if !defined(__amd64) 4667 *edx &= ~CPUID_AMD_EDX_LM; 4668 #endif 4669 /* 4670 * Now map the supported feature vector to 4671 * things that we think userland will care about. 4672 */ 4673 #if defined(__amd64) 4674 if (*edx & CPUID_AMD_EDX_SYSC) 4675 hwcap_flags |= AV_386_AMD_SYSC; 4676 #endif 4677 if (*edx & CPUID_AMD_EDX_MMXamd) 4678 hwcap_flags |= AV_386_AMD_MMX; 4679 if (*edx & CPUID_AMD_EDX_3DNow) 4680 hwcap_flags |= AV_386_AMD_3DNow; 4681 if (*edx & CPUID_AMD_EDX_3DNowx) 4682 hwcap_flags |= AV_386_AMD_3DNowx; 4683 if (*ecx & CPUID_AMD_ECX_SVM) 4684 hwcap_flags |= AV_386_AMD_SVM; 4685 4686 switch (cpi->cpi_vendor) { 4687 case X86_VENDOR_AMD: 4688 if (*edx & CPUID_AMD_EDX_TSCP) 4689 hwcap_flags |= AV_386_TSCP; 4690 if (*ecx & CPUID_AMD_ECX_AHF64) 4691 hwcap_flags |= AV_386_AHF; 4692 if (*ecx & CPUID_AMD_ECX_SSE4A) 4693 hwcap_flags |= AV_386_AMD_SSE4A; 4694 if (*ecx & CPUID_AMD_ECX_LZCNT) 4695 hwcap_flags |= AV_386_AMD_LZCNT; 4696 if (*ecx & CPUID_AMD_ECX_MONITORX) 4697 hwcap_flags_2 |= AV_386_2_MONITORX; 4698 break; 4699 4700 case X86_VENDOR_Intel: 4701 if (*edx & CPUID_AMD_EDX_TSCP) 4702 hwcap_flags |= AV_386_TSCP; 4703 if (*ecx & CPUID_AMD_ECX_LZCNT) 4704 hwcap_flags |= AV_386_AMD_LZCNT; 4705 /* 4706 * Aarrgh. 4707 * Intel uses a different bit in the same word. 4708 */ 4709 if (*ecx & CPUID_INTC_ECX_AHF64) 4710 hwcap_flags |= AV_386_AHF; 4711 break; 4712 4713 default: 4714 break; 4715 } 4716 break; 4717 4718 case X86_VENDOR_TM: 4719 cp.cp_eax = 0x80860001; 4720 (void) __cpuid_insn(&cp); 4721 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx; 4722 break; 4723 4724 default: 4725 break; 4726 } 4727 4728 pass4_done: 4729 cpi->cpi_pass = 4; 4730 if (hwcap_out != NULL) { 4731 hwcap_out[0] = hwcap_flags; 4732 hwcap_out[1] = hwcap_flags_2; 4733 } 4734 } 4735 4736 4737 /* 4738 * Simulate the cpuid instruction using the data we previously 4739 * captured about this CPU. We try our best to return the truth 4740 * about the hardware, independently of kernel support. 4741 */ 4742 uint32_t 4743 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp) 4744 { 4745 struct cpuid_info *cpi; 4746 struct cpuid_regs *xcp; 4747 4748 if (cpu == NULL) 4749 cpu = CPU; 4750 cpi = cpu->cpu_m.mcpu_cpi; 4751 4752 ASSERT(cpuid_checkpass(cpu, 3)); 4753 4754 /* 4755 * CPUID data is cached in two separate places: cpi_std for standard 4756 * CPUID leaves , and cpi_extd for extended CPUID leaves. 4757 */ 4758 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) { 4759 xcp = &cpi->cpi_std[cp->cp_eax]; 4760 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 && 4761 cp->cp_eax <= cpi->cpi_xmaxeax && 4762 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) { 4763 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0]; 4764 } else { 4765 /* 4766 * The caller is asking for data from an input parameter which 4767 * the kernel has not cached. In this case we go fetch from 4768 * the hardware and return the data directly to the user. 4769 */ 4770 return (__cpuid_insn(cp)); 4771 } 4772 4773 cp->cp_eax = xcp->cp_eax; 4774 cp->cp_ebx = xcp->cp_ebx; 4775 cp->cp_ecx = xcp->cp_ecx; 4776 cp->cp_edx = xcp->cp_edx; 4777 return (cp->cp_eax); 4778 } 4779 4780 int 4781 cpuid_checkpass(cpu_t *cpu, int pass) 4782 { 4783 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL && 4784 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass); 4785 } 4786 4787 int 4788 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n) 4789 { 4790 ASSERT(cpuid_checkpass(cpu, 3)); 4791 4792 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr)); 4793 } 4794 4795 int 4796 cpuid_is_cmt(cpu_t *cpu) 4797 { 4798 if (cpu == NULL) 4799 cpu = CPU; 4800 4801 ASSERT(cpuid_checkpass(cpu, 1)); 4802 4803 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0); 4804 } 4805 4806 /* 4807 * AMD and Intel both implement the 64-bit variant of the syscall 4808 * instruction (syscallq), so if there's -any- support for syscall, 4809 * cpuid currently says "yes, we support this". 4810 * 4811 * However, Intel decided to -not- implement the 32-bit variant of the 4812 * syscall instruction, so we provide a predicate to allow our caller 4813 * to test that subtlety here. 4814 * 4815 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor, 4816 * even in the case where the hardware would in fact support it. 4817 */ 4818 /*ARGSUSED*/ 4819 int 4820 cpuid_syscall32_insn(cpu_t *cpu) 4821 { 4822 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1)); 4823 4824 #if !defined(__xpv) 4825 if (cpu == NULL) 4826 cpu = CPU; 4827 4828 /*CSTYLED*/ 4829 { 4830 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4831 4832 if (cpi->cpi_vendor == X86_VENDOR_AMD && 4833 cpi->cpi_xmaxeax >= 0x80000001 && 4834 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC)) 4835 return (1); 4836 } 4837 #endif 4838 return (0); 4839 } 4840 4841 int 4842 cpuid_getidstr(cpu_t *cpu, char *s, size_t n) 4843 { 4844 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 4845 4846 static const char fmt[] = 4847 "x86 (%s %X family %d model %d step %d clock %d MHz)"; 4848 static const char fmt_ht[] = 4849 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)"; 4850 4851 ASSERT(cpuid_checkpass(cpu, 1)); 4852 4853 if (cpuid_is_cmt(cpu)) 4854 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid, 4855 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 4856 cpi->cpi_family, cpi->cpi_model, 4857 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 4858 return (snprintf(s, n, fmt, 4859 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax, 4860 cpi->cpi_family, cpi->cpi_model, 4861 cpi->cpi_step, cpu->cpu_type_info.pi_clock)); 4862 } 4863 4864 const char * 4865 cpuid_getvendorstr(cpu_t *cpu) 4866 { 4867 ASSERT(cpuid_checkpass(cpu, 1)); 4868 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr); 4869 } 4870 4871 uint_t 4872 cpuid_getvendor(cpu_t *cpu) 4873 { 4874 ASSERT(cpuid_checkpass(cpu, 1)); 4875 return (cpu->cpu_m.mcpu_cpi->cpi_vendor); 4876 } 4877 4878 uint_t 4879 cpuid_getfamily(cpu_t *cpu) 4880 { 4881 ASSERT(cpuid_checkpass(cpu, 1)); 4882 return (cpu->cpu_m.mcpu_cpi->cpi_family); 4883 } 4884 4885 uint_t 4886 cpuid_getmodel(cpu_t *cpu) 4887 { 4888 ASSERT(cpuid_checkpass(cpu, 1)); 4889 return (cpu->cpu_m.mcpu_cpi->cpi_model); 4890 } 4891 4892 uint_t 4893 cpuid_get_ncpu_per_chip(cpu_t *cpu) 4894 { 4895 ASSERT(cpuid_checkpass(cpu, 1)); 4896 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip); 4897 } 4898 4899 uint_t 4900 cpuid_get_ncore_per_chip(cpu_t *cpu) 4901 { 4902 ASSERT(cpuid_checkpass(cpu, 1)); 4903 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip); 4904 } 4905 4906 uint_t 4907 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu) 4908 { 4909 ASSERT(cpuid_checkpass(cpu, 2)); 4910 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache); 4911 } 4912 4913 id_t 4914 cpuid_get_last_lvl_cacheid(cpu_t *cpu) 4915 { 4916 ASSERT(cpuid_checkpass(cpu, 2)); 4917 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 4918 } 4919 4920 uint_t 4921 cpuid_getstep(cpu_t *cpu) 4922 { 4923 ASSERT(cpuid_checkpass(cpu, 1)); 4924 return (cpu->cpu_m.mcpu_cpi->cpi_step); 4925 } 4926 4927 uint_t 4928 cpuid_getsig(struct cpu *cpu) 4929 { 4930 ASSERT(cpuid_checkpass(cpu, 1)); 4931 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax); 4932 } 4933 4934 uint32_t 4935 cpuid_getchiprev(struct cpu *cpu) 4936 { 4937 ASSERT(cpuid_checkpass(cpu, 1)); 4938 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev); 4939 } 4940 4941 const char * 4942 cpuid_getchiprevstr(struct cpu *cpu) 4943 { 4944 ASSERT(cpuid_checkpass(cpu, 1)); 4945 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr); 4946 } 4947 4948 uint32_t 4949 cpuid_getsockettype(struct cpu *cpu) 4950 { 4951 ASSERT(cpuid_checkpass(cpu, 1)); 4952 return (cpu->cpu_m.mcpu_cpi->cpi_socket); 4953 } 4954 4955 const char * 4956 cpuid_getsocketstr(cpu_t *cpu) 4957 { 4958 static const char *socketstr = NULL; 4959 struct cpuid_info *cpi; 4960 4961 ASSERT(cpuid_checkpass(cpu, 1)); 4962 cpi = cpu->cpu_m.mcpu_cpi; 4963 4964 /* Assume that socket types are the same across the system */ 4965 if (socketstr == NULL) 4966 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family, 4967 cpi->cpi_model, cpi->cpi_step); 4968 4969 4970 return (socketstr); 4971 } 4972 4973 int 4974 cpuid_get_chipid(cpu_t *cpu) 4975 { 4976 ASSERT(cpuid_checkpass(cpu, 1)); 4977 4978 if (cpuid_is_cmt(cpu)) 4979 return (cpu->cpu_m.mcpu_cpi->cpi_chipid); 4980 return (cpu->cpu_id); 4981 } 4982 4983 id_t 4984 cpuid_get_coreid(cpu_t *cpu) 4985 { 4986 ASSERT(cpuid_checkpass(cpu, 1)); 4987 return (cpu->cpu_m.mcpu_cpi->cpi_coreid); 4988 } 4989 4990 int 4991 cpuid_get_pkgcoreid(cpu_t *cpu) 4992 { 4993 ASSERT(cpuid_checkpass(cpu, 1)); 4994 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid); 4995 } 4996 4997 int 4998 cpuid_get_clogid(cpu_t *cpu) 4999 { 5000 ASSERT(cpuid_checkpass(cpu, 1)); 5001 return (cpu->cpu_m.mcpu_cpi->cpi_clogid); 5002 } 5003 5004 int 5005 cpuid_get_cacheid(cpu_t *cpu) 5006 { 5007 ASSERT(cpuid_checkpass(cpu, 1)); 5008 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid); 5009 } 5010 5011 uint_t 5012 cpuid_get_procnodeid(cpu_t *cpu) 5013 { 5014 ASSERT(cpuid_checkpass(cpu, 1)); 5015 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid); 5016 } 5017 5018 uint_t 5019 cpuid_get_procnodes_per_pkg(cpu_t *cpu) 5020 { 5021 ASSERT(cpuid_checkpass(cpu, 1)); 5022 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg); 5023 } 5024 5025 uint_t 5026 cpuid_get_compunitid(cpu_t *cpu) 5027 { 5028 ASSERT(cpuid_checkpass(cpu, 1)); 5029 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid); 5030 } 5031 5032 uint_t 5033 cpuid_get_cores_per_compunit(cpu_t *cpu) 5034 { 5035 ASSERT(cpuid_checkpass(cpu, 1)); 5036 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit); 5037 } 5038 5039 /*ARGSUSED*/ 5040 int 5041 cpuid_have_cr8access(cpu_t *cpu) 5042 { 5043 #if defined(__amd64) 5044 return (1); 5045 #else 5046 struct cpuid_info *cpi; 5047 5048 ASSERT(cpu != NULL); 5049 cpi = cpu->cpu_m.mcpu_cpi; 5050 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 && 5051 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0) 5052 return (1); 5053 return (0); 5054 #endif 5055 } 5056 5057 uint32_t 5058 cpuid_get_apicid(cpu_t *cpu) 5059 { 5060 ASSERT(cpuid_checkpass(cpu, 1)); 5061 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) { 5062 return (UINT32_MAX); 5063 } else { 5064 return (cpu->cpu_m.mcpu_cpi->cpi_apicid); 5065 } 5066 } 5067 5068 void 5069 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits) 5070 { 5071 struct cpuid_info *cpi; 5072 5073 if (cpu == NULL) 5074 cpu = CPU; 5075 cpi = cpu->cpu_m.mcpu_cpi; 5076 5077 ASSERT(cpuid_checkpass(cpu, 1)); 5078 5079 if (pabits) 5080 *pabits = cpi->cpi_pabits; 5081 if (vabits) 5082 *vabits = cpi->cpi_vabits; 5083 } 5084 5085 size_t 5086 cpuid_get_xsave_size() 5087 { 5088 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size, 5089 sizeof (struct xsave_state))); 5090 } 5091 5092 /* 5093 * Return true if the CPUs on this system require 'pointer clearing' for the 5094 * floating point error pointer exception handling. In the past, this has been 5095 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to 5096 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO 5097 * feature bit and is reflected in the cpi_fp_amd_save member. 5098 */ 5099 boolean_t 5100 cpuid_need_fp_excp_handling() 5101 { 5102 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD && 5103 cpuid_info0.cpi_fp_amd_save != 0); 5104 } 5105 5106 /* 5107 * Returns the number of data TLB entries for a corresponding 5108 * pagesize. If it can't be computed, or isn't known, the 5109 * routine returns zero. If you ask about an architecturally 5110 * impossible pagesize, the routine will panic (so that the 5111 * hat implementor knows that things are inconsistent.) 5112 */ 5113 uint_t 5114 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize) 5115 { 5116 struct cpuid_info *cpi; 5117 uint_t dtlb_nent = 0; 5118 5119 if (cpu == NULL) 5120 cpu = CPU; 5121 cpi = cpu->cpu_m.mcpu_cpi; 5122 5123 ASSERT(cpuid_checkpass(cpu, 1)); 5124 5125 /* 5126 * Check the L2 TLB info 5127 */ 5128 if (cpi->cpi_xmaxeax >= 0x80000006) { 5129 struct cpuid_regs *cp = &cpi->cpi_extd[6]; 5130 5131 switch (pagesize) { 5132 5133 case 4 * 1024: 5134 /* 5135 * All zero in the top 16 bits of the register 5136 * indicates a unified TLB. Size is in low 16 bits. 5137 */ 5138 if ((cp->cp_ebx & 0xffff0000) == 0) 5139 dtlb_nent = cp->cp_ebx & 0x0000ffff; 5140 else 5141 dtlb_nent = BITX(cp->cp_ebx, 27, 16); 5142 break; 5143 5144 case 2 * 1024 * 1024: 5145 if ((cp->cp_eax & 0xffff0000) == 0) 5146 dtlb_nent = cp->cp_eax & 0x0000ffff; 5147 else 5148 dtlb_nent = BITX(cp->cp_eax, 27, 16); 5149 break; 5150 5151 default: 5152 panic("unknown L2 pagesize"); 5153 /*NOTREACHED*/ 5154 } 5155 } 5156 5157 if (dtlb_nent != 0) 5158 return (dtlb_nent); 5159 5160 /* 5161 * No L2 TLB support for this size, try L1. 5162 */ 5163 if (cpi->cpi_xmaxeax >= 0x80000005) { 5164 struct cpuid_regs *cp = &cpi->cpi_extd[5]; 5165 5166 switch (pagesize) { 5167 case 4 * 1024: 5168 dtlb_nent = BITX(cp->cp_ebx, 23, 16); 5169 break; 5170 case 2 * 1024 * 1024: 5171 dtlb_nent = BITX(cp->cp_eax, 23, 16); 5172 break; 5173 default: 5174 panic("unknown L1 d-TLB pagesize"); 5175 /*NOTREACHED*/ 5176 } 5177 } 5178 5179 return (dtlb_nent); 5180 } 5181 5182 /* 5183 * Return 0 if the erratum is not present or not applicable, positive 5184 * if it is, and negative if the status of the erratum is unknown. 5185 * 5186 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm) 5187 * Processors" #25759, Rev 3.57, August 2005 5188 */ 5189 int 5190 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) 5191 { 5192 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 5193 uint_t eax; 5194 5195 /* 5196 * Bail out if this CPU isn't an AMD CPU, or if it's 5197 * a legacy (32-bit) AMD CPU. 5198 */ 5199 if (cpi->cpi_vendor != X86_VENDOR_AMD || 5200 cpi->cpi_family == 4 || cpi->cpi_family == 5 || 5201 cpi->cpi_family == 6) { 5202 return (0); 5203 } 5204 5205 eax = cpi->cpi_std[1].cp_eax; 5206 5207 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50) 5208 #define SH_B3(eax) (eax == 0xf51) 5209 #define B(eax) (SH_B0(eax) || SH_B3(eax)) 5210 5211 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58) 5212 5213 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a) 5214 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0) 5215 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2) 5216 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax)) 5217 5218 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70) 5219 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0) 5220 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0) 5221 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax)) 5222 5223 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70) 5224 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */ 5225 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0) 5226 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71) 5227 #define BH_E4(eax) (eax == 0x20fb1) 5228 #define SH_E5(eax) (eax == 0x20f42) 5229 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2) 5230 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32) 5231 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \ 5232 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \ 5233 DH_E6(eax) || JH_E6(eax)) 5234 5235 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02) 5236 #define DR_B0(eax) (eax == 0x100f20) 5237 #define DR_B1(eax) (eax == 0x100f21) 5238 #define DR_BA(eax) (eax == 0x100f2a) 5239 #define DR_B2(eax) (eax == 0x100f22) 5240 #define DR_B3(eax) (eax == 0x100f23) 5241 #define RB_C0(eax) (eax == 0x100f40) 5242 5243 switch (erratum) { 5244 case 1: 5245 return (cpi->cpi_family < 0x10); 5246 case 51: /* what does the asterisk mean? */ 5247 return (B(eax) || SH_C0(eax) || CG(eax)); 5248 case 52: 5249 return (B(eax)); 5250 case 57: 5251 return (cpi->cpi_family <= 0x11); 5252 case 58: 5253 return (B(eax)); 5254 case 60: 5255 return (cpi->cpi_family <= 0x11); 5256 case 61: 5257 case 62: 5258 case 63: 5259 case 64: 5260 case 65: 5261 case 66: 5262 case 68: 5263 case 69: 5264 case 70: 5265 case 71: 5266 return (B(eax)); 5267 case 72: 5268 return (SH_B0(eax)); 5269 case 74: 5270 return (B(eax)); 5271 case 75: 5272 return (cpi->cpi_family < 0x10); 5273 case 76: 5274 return (B(eax)); 5275 case 77: 5276 return (cpi->cpi_family <= 0x11); 5277 case 78: 5278 return (B(eax) || SH_C0(eax)); 5279 case 79: 5280 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 5281 case 80: 5282 case 81: 5283 case 82: 5284 return (B(eax)); 5285 case 83: 5286 return (B(eax) || SH_C0(eax) || CG(eax)); 5287 case 85: 5288 return (cpi->cpi_family < 0x10); 5289 case 86: 5290 return (SH_C0(eax) || CG(eax)); 5291 case 88: 5292 #if !defined(__amd64) 5293 return (0); 5294 #else 5295 return (B(eax) || SH_C0(eax)); 5296 #endif 5297 case 89: 5298 return (cpi->cpi_family < 0x10); 5299 case 90: 5300 return (B(eax) || SH_C0(eax) || CG(eax)); 5301 case 91: 5302 case 92: 5303 return (B(eax) || SH_C0(eax)); 5304 case 93: 5305 return (SH_C0(eax)); 5306 case 94: 5307 return (B(eax) || SH_C0(eax) || CG(eax)); 5308 case 95: 5309 #if !defined(__amd64) 5310 return (0); 5311 #else 5312 return (B(eax) || SH_C0(eax)); 5313 #endif 5314 case 96: 5315 return (B(eax) || SH_C0(eax) || CG(eax)); 5316 case 97: 5317 case 98: 5318 return (SH_C0(eax) || CG(eax)); 5319 case 99: 5320 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5321 case 100: 5322 return (B(eax) || SH_C0(eax)); 5323 case 101: 5324 case 103: 5325 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5326 case 104: 5327 return (SH_C0(eax) || CG(eax) || D0(eax)); 5328 case 105: 5329 case 106: 5330 case 107: 5331 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5332 case 108: 5333 return (DH_CG(eax)); 5334 case 109: 5335 return (SH_C0(eax) || CG(eax) || D0(eax)); 5336 case 110: 5337 return (D0(eax) || EX(eax)); 5338 case 111: 5339 return (CG(eax)); 5340 case 112: 5341 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 5342 case 113: 5343 return (eax == 0x20fc0); 5344 case 114: 5345 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 5346 case 115: 5347 return (SH_E0(eax) || JH_E1(eax)); 5348 case 116: 5349 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax)); 5350 case 117: 5351 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax)); 5352 case 118: 5353 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) || 5354 JH_E6(eax)); 5355 case 121: 5356 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax)); 5357 case 122: 5358 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11); 5359 case 123: 5360 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax)); 5361 case 131: 5362 return (cpi->cpi_family < 0x10); 5363 case 6336786: 5364 5365 /* 5366 * Test for AdvPowerMgmtInfo.TscPStateInvariant 5367 * if this is a K8 family or newer processor. We're testing for 5368 * this 'erratum' to determine whether or not we have a constant 5369 * TSC. 5370 * 5371 * Our current fix for this is to disable the C1-Clock ramping. 5372 * However, this doesn't work on newer processor families nor 5373 * does it work when virtualized as those devices don't exist. 5374 */ 5375 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) { 5376 return (0); 5377 } 5378 5379 if (CPI_FAMILY(cpi) == 0xf) { 5380 struct cpuid_regs regs; 5381 regs.cp_eax = 0x80000007; 5382 (void) __cpuid_insn(®s); 5383 return (!(regs.cp_edx & 0x100)); 5384 } 5385 return (0); 5386 case 6323525: 5387 /* 5388 * This erratum (K8 #147) is not present on family 10 and newer. 5389 */ 5390 if (cpi->cpi_family >= 0x10) { 5391 return (0); 5392 } 5393 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) | 5394 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40); 5395 5396 case 6671130: 5397 /* 5398 * check for processors (pre-Shanghai) that do not provide 5399 * optimal management of 1gb ptes in its tlb. 5400 */ 5401 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4); 5402 5403 case 298: 5404 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) || 5405 DR_B2(eax) || RB_C0(eax)); 5406 5407 case 721: 5408 #if defined(__amd64) 5409 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12); 5410 #else 5411 return (0); 5412 #endif 5413 5414 default: 5415 return (-1); 5416 5417 } 5418 } 5419 5420 /* 5421 * Determine if specified erratum is present via OSVW (OS Visible Workaround). 5422 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate. 5423 */ 5424 int 5425 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum) 5426 { 5427 struct cpuid_info *cpi; 5428 uint_t osvwid; 5429 static int osvwfeature = -1; 5430 uint64_t osvwlength; 5431 5432 5433 cpi = cpu->cpu_m.mcpu_cpi; 5434 5435 /* confirm OSVW supported */ 5436 if (osvwfeature == -1) { 5437 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW; 5438 } else { 5439 /* assert that osvw feature setting is consistent on all cpus */ 5440 ASSERT(osvwfeature == 5441 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW)); 5442 } 5443 if (!osvwfeature) 5444 return (-1); 5445 5446 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK; 5447 5448 switch (erratum) { 5449 case 298: /* osvwid is 0 */ 5450 osvwid = 0; 5451 if (osvwlength <= (uint64_t)osvwid) { 5452 /* osvwid 0 is unknown */ 5453 return (-1); 5454 } 5455 5456 /* 5457 * Check the OSVW STATUS MSR to determine the state 5458 * of the erratum where: 5459 * 0 - fixed by HW 5460 * 1 - BIOS has applied the workaround when BIOS 5461 * workaround is available. (Or for other errata, 5462 * OS workaround is required.) 5463 * For a value of 1, caller will confirm that the 5464 * erratum 298 workaround has indeed been applied by BIOS. 5465 * 5466 * A 1 may be set in cpus that have a HW fix 5467 * in a mixed cpu system. Regarding erratum 298: 5468 * In a multiprocessor platform, the workaround above 5469 * should be applied to all processors regardless of 5470 * silicon revision when an affected processor is 5471 * present. 5472 */ 5473 5474 return (rdmsr(MSR_AMD_OSVW_STATUS + 5475 (osvwid / OSVW_ID_CNT_PER_MSR)) & 5476 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR))); 5477 5478 default: 5479 return (-1); 5480 } 5481 } 5482 5483 static const char assoc_str[] = "associativity"; 5484 static const char line_str[] = "line-size"; 5485 static const char size_str[] = "size"; 5486 5487 static void 5488 add_cache_prop(dev_info_t *devi, const char *label, const char *type, 5489 uint32_t val) 5490 { 5491 char buf[128]; 5492 5493 /* 5494 * ndi_prop_update_int() is used because it is desirable for 5495 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set. 5496 */ 5497 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf)) 5498 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val); 5499 } 5500 5501 /* 5502 * Intel-style cache/tlb description 5503 * 5504 * Standard cpuid level 2 gives a randomly ordered 5505 * selection of tags that index into a table that describes 5506 * cache and tlb properties. 5507 */ 5508 5509 static const char l1_icache_str[] = "l1-icache"; 5510 static const char l1_dcache_str[] = "l1-dcache"; 5511 static const char l2_cache_str[] = "l2-cache"; 5512 static const char l3_cache_str[] = "l3-cache"; 5513 static const char itlb4k_str[] = "itlb-4K"; 5514 static const char dtlb4k_str[] = "dtlb-4K"; 5515 static const char itlb2M_str[] = "itlb-2M"; 5516 static const char itlb4M_str[] = "itlb-4M"; 5517 static const char dtlb4M_str[] = "dtlb-4M"; 5518 static const char dtlb24_str[] = "dtlb0-2M-4M"; 5519 static const char itlb424_str[] = "itlb-4K-2M-4M"; 5520 static const char itlb24_str[] = "itlb-2M-4M"; 5521 static const char dtlb44_str[] = "dtlb-4K-4M"; 5522 static const char sl1_dcache_str[] = "sectored-l1-dcache"; 5523 static const char sl2_cache_str[] = "sectored-l2-cache"; 5524 static const char itrace_str[] = "itrace-cache"; 5525 static const char sl3_cache_str[] = "sectored-l3-cache"; 5526 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k"; 5527 5528 static const struct cachetab { 5529 uint8_t ct_code; 5530 uint8_t ct_assoc; 5531 uint16_t ct_line_size; 5532 size_t ct_size; 5533 const char *ct_label; 5534 } intel_ctab[] = { 5535 /* 5536 * maintain descending order! 5537 * 5538 * Codes ignored - Reason 5539 * ---------------------- 5540 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache 5541 * f0H/f1H - Currently we do not interpret prefetch size by design 5542 */ 5543 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str}, 5544 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str}, 5545 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str}, 5546 { 0xde, 12, 64, 6*1024*1024, l3_cache_str}, 5547 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str}, 5548 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str}, 5549 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str}, 5550 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str}, 5551 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str}, 5552 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str}, 5553 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str}, 5554 { 0xd0, 4, 64, 512*1024, l3_cache_str}, 5555 { 0xca, 4, 0, 512, sh_l2_tlb4k_str}, 5556 { 0xc0, 4, 0, 8, dtlb44_str }, 5557 { 0xba, 4, 0, 64, dtlb4k_str }, 5558 { 0xb4, 4, 0, 256, dtlb4k_str }, 5559 { 0xb3, 4, 0, 128, dtlb4k_str }, 5560 { 0xb2, 4, 0, 64, itlb4k_str }, 5561 { 0xb0, 4, 0, 128, itlb4k_str }, 5562 { 0x87, 8, 64, 1024*1024, l2_cache_str}, 5563 { 0x86, 4, 64, 512*1024, l2_cache_str}, 5564 { 0x85, 8, 32, 2*1024*1024, l2_cache_str}, 5565 { 0x84, 8, 32, 1024*1024, l2_cache_str}, 5566 { 0x83, 8, 32, 512*1024, l2_cache_str}, 5567 { 0x82, 8, 32, 256*1024, l2_cache_str}, 5568 { 0x80, 8, 64, 512*1024, l2_cache_str}, 5569 { 0x7f, 2, 64, 512*1024, l2_cache_str}, 5570 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str}, 5571 { 0x7c, 8, 64, 1024*1024, sl2_cache_str}, 5572 { 0x7b, 8, 64, 512*1024, sl2_cache_str}, 5573 { 0x7a, 8, 64, 256*1024, sl2_cache_str}, 5574 { 0x79, 8, 64, 128*1024, sl2_cache_str}, 5575 { 0x78, 8, 64, 1024*1024, l2_cache_str}, 5576 { 0x73, 8, 0, 64*1024, itrace_str}, 5577 { 0x72, 8, 0, 32*1024, itrace_str}, 5578 { 0x71, 8, 0, 16*1024, itrace_str}, 5579 { 0x70, 8, 0, 12*1024, itrace_str}, 5580 { 0x68, 4, 64, 32*1024, sl1_dcache_str}, 5581 { 0x67, 4, 64, 16*1024, sl1_dcache_str}, 5582 { 0x66, 4, 64, 8*1024, sl1_dcache_str}, 5583 { 0x60, 8, 64, 16*1024, sl1_dcache_str}, 5584 { 0x5d, 0, 0, 256, dtlb44_str}, 5585 { 0x5c, 0, 0, 128, dtlb44_str}, 5586 { 0x5b, 0, 0, 64, dtlb44_str}, 5587 { 0x5a, 4, 0, 32, dtlb24_str}, 5588 { 0x59, 0, 0, 16, dtlb4k_str}, 5589 { 0x57, 4, 0, 16, dtlb4k_str}, 5590 { 0x56, 4, 0, 16, dtlb4M_str}, 5591 { 0x55, 0, 0, 7, itlb24_str}, 5592 { 0x52, 0, 0, 256, itlb424_str}, 5593 { 0x51, 0, 0, 128, itlb424_str}, 5594 { 0x50, 0, 0, 64, itlb424_str}, 5595 { 0x4f, 0, 0, 32, itlb4k_str}, 5596 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str}, 5597 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str}, 5598 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str}, 5599 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str}, 5600 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str}, 5601 { 0x49, 16, 64, 4*1024*1024, l3_cache_str}, 5602 { 0x48, 12, 64, 3*1024*1024, l2_cache_str}, 5603 { 0x47, 8, 64, 8*1024*1024, l3_cache_str}, 5604 { 0x46, 4, 64, 4*1024*1024, l3_cache_str}, 5605 { 0x45, 4, 32, 2*1024*1024, l2_cache_str}, 5606 { 0x44, 4, 32, 1024*1024, l2_cache_str}, 5607 { 0x43, 4, 32, 512*1024, l2_cache_str}, 5608 { 0x42, 4, 32, 256*1024, l2_cache_str}, 5609 { 0x41, 4, 32, 128*1024, l2_cache_str}, 5610 { 0x3e, 4, 64, 512*1024, sl2_cache_str}, 5611 { 0x3d, 6, 64, 384*1024, sl2_cache_str}, 5612 { 0x3c, 4, 64, 256*1024, sl2_cache_str}, 5613 { 0x3b, 2, 64, 128*1024, sl2_cache_str}, 5614 { 0x3a, 6, 64, 192*1024, sl2_cache_str}, 5615 { 0x39, 4, 64, 128*1024, sl2_cache_str}, 5616 { 0x30, 8, 64, 32*1024, l1_icache_str}, 5617 { 0x2c, 8, 64, 32*1024, l1_dcache_str}, 5618 { 0x29, 8, 64, 4096*1024, sl3_cache_str}, 5619 { 0x25, 8, 64, 2048*1024, sl3_cache_str}, 5620 { 0x23, 8, 64, 1024*1024, sl3_cache_str}, 5621 { 0x22, 4, 64, 512*1024, sl3_cache_str}, 5622 { 0x0e, 6, 64, 24*1024, l1_dcache_str}, 5623 { 0x0d, 4, 32, 16*1024, l1_dcache_str}, 5624 { 0x0c, 4, 32, 16*1024, l1_dcache_str}, 5625 { 0x0b, 4, 0, 4, itlb4M_str}, 5626 { 0x0a, 2, 32, 8*1024, l1_dcache_str}, 5627 { 0x08, 4, 32, 16*1024, l1_icache_str}, 5628 { 0x06, 4, 32, 8*1024, l1_icache_str}, 5629 { 0x05, 4, 0, 32, dtlb4M_str}, 5630 { 0x04, 4, 0, 8, dtlb4M_str}, 5631 { 0x03, 4, 0, 64, dtlb4k_str}, 5632 { 0x02, 4, 0, 2, itlb4M_str}, 5633 { 0x01, 4, 0, 32, itlb4k_str}, 5634 { 0 } 5635 }; 5636 5637 static const struct cachetab cyrix_ctab[] = { 5638 { 0x70, 4, 0, 32, "tlb-4K" }, 5639 { 0x80, 4, 16, 16*1024, "l1-cache" }, 5640 { 0 } 5641 }; 5642 5643 /* 5644 * Search a cache table for a matching entry 5645 */ 5646 static const struct cachetab * 5647 find_cacheent(const struct cachetab *ct, uint_t code) 5648 { 5649 if (code != 0) { 5650 for (; ct->ct_code != 0; ct++) 5651 if (ct->ct_code <= code) 5652 break; 5653 if (ct->ct_code == code) 5654 return (ct); 5655 } 5656 return (NULL); 5657 } 5658 5659 /* 5660 * Populate cachetab entry with L2 or L3 cache-information using 5661 * cpuid function 4. This function is called from intel_walk_cacheinfo() 5662 * when descriptor 0x49 is encountered. It returns 0 if no such cache 5663 * information is found. 5664 */ 5665 static int 5666 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi) 5667 { 5668 uint32_t level, i; 5669 int ret = 0; 5670 5671 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) { 5672 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]); 5673 5674 if (level == 2 || level == 3) { 5675 ct->ct_assoc = 5676 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1; 5677 ct->ct_line_size = 5678 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1; 5679 ct->ct_size = ct->ct_assoc * 5680 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) * 5681 ct->ct_line_size * 5682 (cpi->cpi_cache_leaves[i]->cp_ecx + 1); 5683 5684 if (level == 2) { 5685 ct->ct_label = l2_cache_str; 5686 } else if (level == 3) { 5687 ct->ct_label = l3_cache_str; 5688 } 5689 ret = 1; 5690 } 5691 } 5692 5693 return (ret); 5694 } 5695 5696 /* 5697 * Walk the cacheinfo descriptor, applying 'func' to every valid element 5698 * The walk is terminated if the walker returns non-zero. 5699 */ 5700 static void 5701 intel_walk_cacheinfo(struct cpuid_info *cpi, 5702 void *arg, int (*func)(void *, const struct cachetab *)) 5703 { 5704 const struct cachetab *ct; 5705 struct cachetab des_49_ct, des_b1_ct; 5706 uint8_t *dp; 5707 int i; 5708 5709 if ((dp = cpi->cpi_cacheinfo) == NULL) 5710 return; 5711 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 5712 /* 5713 * For overloaded descriptor 0x49 we use cpuid function 4 5714 * if supported by the current processor, to create 5715 * cache information. 5716 * For overloaded descriptor 0xb1 we use X86_PAE flag 5717 * to disambiguate the cache information. 5718 */ 5719 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 && 5720 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) { 5721 ct = &des_49_ct; 5722 } else if (*dp == 0xb1) { 5723 des_b1_ct.ct_code = 0xb1; 5724 des_b1_ct.ct_assoc = 4; 5725 des_b1_ct.ct_line_size = 0; 5726 if (is_x86_feature(x86_featureset, X86FSET_PAE)) { 5727 des_b1_ct.ct_size = 8; 5728 des_b1_ct.ct_label = itlb2M_str; 5729 } else { 5730 des_b1_ct.ct_size = 4; 5731 des_b1_ct.ct_label = itlb4M_str; 5732 } 5733 ct = &des_b1_ct; 5734 } else { 5735 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) { 5736 continue; 5737 } 5738 } 5739 5740 if (func(arg, ct) != 0) { 5741 break; 5742 } 5743 } 5744 } 5745 5746 /* 5747 * (Like the Intel one, except for Cyrix CPUs) 5748 */ 5749 static void 5750 cyrix_walk_cacheinfo(struct cpuid_info *cpi, 5751 void *arg, int (*func)(void *, const struct cachetab *)) 5752 { 5753 const struct cachetab *ct; 5754 uint8_t *dp; 5755 int i; 5756 5757 if ((dp = cpi->cpi_cacheinfo) == NULL) 5758 return; 5759 for (i = 0; i < cpi->cpi_ncache; i++, dp++) { 5760 /* 5761 * Search Cyrix-specific descriptor table first .. 5762 */ 5763 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) { 5764 if (func(arg, ct) != 0) 5765 break; 5766 continue; 5767 } 5768 /* 5769 * .. else fall back to the Intel one 5770 */ 5771 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) { 5772 if (func(arg, ct) != 0) 5773 break; 5774 continue; 5775 } 5776 } 5777 } 5778 5779 /* 5780 * A cacheinfo walker that adds associativity, line-size, and size properties 5781 * to the devinfo node it is passed as an argument. 5782 */ 5783 static int 5784 add_cacheent_props(void *arg, const struct cachetab *ct) 5785 { 5786 dev_info_t *devi = arg; 5787 5788 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc); 5789 if (ct->ct_line_size != 0) 5790 add_cache_prop(devi, ct->ct_label, line_str, 5791 ct->ct_line_size); 5792 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size); 5793 return (0); 5794 } 5795 5796 5797 static const char fully_assoc[] = "fully-associative?"; 5798 5799 /* 5800 * AMD style cache/tlb description 5801 * 5802 * Extended functions 5 and 6 directly describe properties of 5803 * tlbs and various cache levels. 5804 */ 5805 static void 5806 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc) 5807 { 5808 switch (assoc) { 5809 case 0: /* reserved; ignore */ 5810 break; 5811 default: 5812 add_cache_prop(devi, label, assoc_str, assoc); 5813 break; 5814 case 0xff: 5815 add_cache_prop(devi, label, fully_assoc, 1); 5816 break; 5817 } 5818 } 5819 5820 static void 5821 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 5822 { 5823 if (size == 0) 5824 return; 5825 add_cache_prop(devi, label, size_str, size); 5826 add_amd_assoc(devi, label, assoc); 5827 } 5828 5829 static void 5830 add_amd_cache(dev_info_t *devi, const char *label, 5831 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 5832 { 5833 if (size == 0 || line_size == 0) 5834 return; 5835 add_amd_assoc(devi, label, assoc); 5836 /* 5837 * Most AMD parts have a sectored cache. Multiple cache lines are 5838 * associated with each tag. A sector consists of all cache lines 5839 * associated with a tag. For example, the AMD K6-III has a sector 5840 * size of 2 cache lines per tag. 5841 */ 5842 if (lines_per_tag != 0) 5843 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 5844 add_cache_prop(devi, label, line_str, line_size); 5845 add_cache_prop(devi, label, size_str, size * 1024); 5846 } 5847 5848 static void 5849 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc) 5850 { 5851 switch (assoc) { 5852 case 0: /* off */ 5853 break; 5854 case 1: 5855 case 2: 5856 case 4: 5857 add_cache_prop(devi, label, assoc_str, assoc); 5858 break; 5859 case 6: 5860 add_cache_prop(devi, label, assoc_str, 8); 5861 break; 5862 case 8: 5863 add_cache_prop(devi, label, assoc_str, 16); 5864 break; 5865 case 0xf: 5866 add_cache_prop(devi, label, fully_assoc, 1); 5867 break; 5868 default: /* reserved; ignore */ 5869 break; 5870 } 5871 } 5872 5873 static void 5874 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size) 5875 { 5876 if (size == 0 || assoc == 0) 5877 return; 5878 add_amd_l2_assoc(devi, label, assoc); 5879 add_cache_prop(devi, label, size_str, size); 5880 } 5881 5882 static void 5883 add_amd_l2_cache(dev_info_t *devi, const char *label, 5884 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size) 5885 { 5886 if (size == 0 || assoc == 0 || line_size == 0) 5887 return; 5888 add_amd_l2_assoc(devi, label, assoc); 5889 if (lines_per_tag != 0) 5890 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag); 5891 add_cache_prop(devi, label, line_str, line_size); 5892 add_cache_prop(devi, label, size_str, size * 1024); 5893 } 5894 5895 static void 5896 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi) 5897 { 5898 struct cpuid_regs *cp; 5899 5900 if (cpi->cpi_xmaxeax < 0x80000005) 5901 return; 5902 cp = &cpi->cpi_extd[5]; 5903 5904 /* 5905 * 4M/2M L1 TLB configuration 5906 * 5907 * We report the size for 2M pages because AMD uses two 5908 * TLB entries for one 4M page. 5909 */ 5910 add_amd_tlb(devi, "dtlb-2M", 5911 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16)); 5912 add_amd_tlb(devi, "itlb-2M", 5913 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0)); 5914 5915 /* 5916 * 4K L1 TLB configuration 5917 */ 5918 5919 switch (cpi->cpi_vendor) { 5920 uint_t nentries; 5921 case X86_VENDOR_TM: 5922 if (cpi->cpi_family >= 5) { 5923 /* 5924 * Crusoe processors have 256 TLB entries, but 5925 * cpuid data format constrains them to only 5926 * reporting 255 of them. 5927 */ 5928 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255) 5929 nentries = 256; 5930 /* 5931 * Crusoe processors also have a unified TLB 5932 */ 5933 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24), 5934 nentries); 5935 break; 5936 } 5937 /*FALLTHROUGH*/ 5938 default: 5939 add_amd_tlb(devi, itlb4k_str, 5940 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16)); 5941 add_amd_tlb(devi, dtlb4k_str, 5942 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0)); 5943 break; 5944 } 5945 5946 /* 5947 * data L1 cache configuration 5948 */ 5949 5950 add_amd_cache(devi, l1_dcache_str, 5951 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16), 5952 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0)); 5953 5954 /* 5955 * code L1 cache configuration 5956 */ 5957 5958 add_amd_cache(devi, l1_icache_str, 5959 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16), 5960 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0)); 5961 5962 if (cpi->cpi_xmaxeax < 0x80000006) 5963 return; 5964 cp = &cpi->cpi_extd[6]; 5965 5966 /* Check for a unified L2 TLB for large pages */ 5967 5968 if (BITX(cp->cp_eax, 31, 16) == 0) 5969 add_amd_l2_tlb(devi, "l2-tlb-2M", 5970 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5971 else { 5972 add_amd_l2_tlb(devi, "l2-dtlb-2M", 5973 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 5974 add_amd_l2_tlb(devi, "l2-itlb-2M", 5975 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5976 } 5977 5978 /* Check for a unified L2 TLB for 4K pages */ 5979 5980 if (BITX(cp->cp_ebx, 31, 16) == 0) { 5981 add_amd_l2_tlb(devi, "l2-tlb-4K", 5982 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5983 } else { 5984 add_amd_l2_tlb(devi, "l2-dtlb-4K", 5985 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16)); 5986 add_amd_l2_tlb(devi, "l2-itlb-4K", 5987 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0)); 5988 } 5989 5990 add_amd_l2_cache(devi, l2_cache_str, 5991 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12), 5992 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0)); 5993 } 5994 5995 /* 5996 * There are two basic ways that the x86 world describes it cache 5997 * and tlb architecture - Intel's way and AMD's way. 5998 * 5999 * Return which flavor of cache architecture we should use 6000 */ 6001 static int 6002 x86_which_cacheinfo(struct cpuid_info *cpi) 6003 { 6004 switch (cpi->cpi_vendor) { 6005 case X86_VENDOR_Intel: 6006 if (cpi->cpi_maxeax >= 2) 6007 return (X86_VENDOR_Intel); 6008 break; 6009 case X86_VENDOR_AMD: 6010 /* 6011 * The K5 model 1 was the first part from AMD that reported 6012 * cache sizes via extended cpuid functions. 6013 */ 6014 if (cpi->cpi_family > 5 || 6015 (cpi->cpi_family == 5 && cpi->cpi_model >= 1)) 6016 return (X86_VENDOR_AMD); 6017 break; 6018 case X86_VENDOR_TM: 6019 if (cpi->cpi_family >= 5) 6020 return (X86_VENDOR_AMD); 6021 /*FALLTHROUGH*/ 6022 default: 6023 /* 6024 * If they have extended CPU data for 0x80000005 6025 * then we assume they have AMD-format cache 6026 * information. 6027 * 6028 * If not, and the vendor happens to be Cyrix, 6029 * then try our-Cyrix specific handler. 6030 * 6031 * If we're not Cyrix, then assume we're using Intel's 6032 * table-driven format instead. 6033 */ 6034 if (cpi->cpi_xmaxeax >= 0x80000005) 6035 return (X86_VENDOR_AMD); 6036 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix) 6037 return (X86_VENDOR_Cyrix); 6038 else if (cpi->cpi_maxeax >= 2) 6039 return (X86_VENDOR_Intel); 6040 break; 6041 } 6042 return (-1); 6043 } 6044 6045 void 6046 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id, 6047 struct cpuid_info *cpi) 6048 { 6049 dev_info_t *cpu_devi; 6050 int create; 6051 6052 cpu_devi = (dev_info_t *)dip; 6053 6054 /* device_type */ 6055 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 6056 "device_type", "cpu"); 6057 6058 /* reg */ 6059 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6060 "reg", cpu_id); 6061 6062 /* cpu-mhz, and clock-frequency */ 6063 if (cpu_freq > 0) { 6064 long long mul; 6065 6066 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6067 "cpu-mhz", cpu_freq); 6068 if ((mul = cpu_freq * 1000000LL) <= INT_MAX) 6069 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6070 "clock-frequency", (int)mul); 6071 } 6072 6073 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) { 6074 return; 6075 } 6076 6077 /* vendor-id */ 6078 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 6079 "vendor-id", cpi->cpi_vendorstr); 6080 6081 if (cpi->cpi_maxeax == 0) { 6082 return; 6083 } 6084 6085 /* 6086 * family, model, and step 6087 */ 6088 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6089 "family", CPI_FAMILY(cpi)); 6090 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6091 "cpu-model", CPI_MODEL(cpi)); 6092 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6093 "stepping-id", CPI_STEP(cpi)); 6094 6095 /* type */ 6096 switch (cpi->cpi_vendor) { 6097 case X86_VENDOR_Intel: 6098 create = 1; 6099 break; 6100 default: 6101 create = 0; 6102 break; 6103 } 6104 if (create) 6105 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6106 "type", CPI_TYPE(cpi)); 6107 6108 /* ext-family */ 6109 switch (cpi->cpi_vendor) { 6110 case X86_VENDOR_Intel: 6111 case X86_VENDOR_AMD: 6112 create = cpi->cpi_family >= 0xf; 6113 break; 6114 default: 6115 create = 0; 6116 break; 6117 } 6118 if (create) 6119 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6120 "ext-family", CPI_FAMILY_XTD(cpi)); 6121 6122 /* ext-model */ 6123 switch (cpi->cpi_vendor) { 6124 case X86_VENDOR_Intel: 6125 create = IS_EXTENDED_MODEL_INTEL(cpi); 6126 break; 6127 case X86_VENDOR_AMD: 6128 create = CPI_FAMILY(cpi) == 0xf; 6129 break; 6130 default: 6131 create = 0; 6132 break; 6133 } 6134 if (create) 6135 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6136 "ext-model", CPI_MODEL_XTD(cpi)); 6137 6138 /* generation */ 6139 switch (cpi->cpi_vendor) { 6140 case X86_VENDOR_AMD: 6141 /* 6142 * AMD K5 model 1 was the first part to support this 6143 */ 6144 create = cpi->cpi_xmaxeax >= 0x80000001; 6145 break; 6146 default: 6147 create = 0; 6148 break; 6149 } 6150 if (create) 6151 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6152 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8)); 6153 6154 /* brand-id */ 6155 switch (cpi->cpi_vendor) { 6156 case X86_VENDOR_Intel: 6157 /* 6158 * brand id first appeared on Pentium III Xeon model 8, 6159 * and Celeron model 8 processors and Opteron 6160 */ 6161 create = cpi->cpi_family > 6 || 6162 (cpi->cpi_family == 6 && cpi->cpi_model >= 8); 6163 break; 6164 case X86_VENDOR_AMD: 6165 create = cpi->cpi_family >= 0xf; 6166 break; 6167 default: 6168 create = 0; 6169 break; 6170 } 6171 if (create && cpi->cpi_brandid != 0) { 6172 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6173 "brand-id", cpi->cpi_brandid); 6174 } 6175 6176 /* chunks, and apic-id */ 6177 switch (cpi->cpi_vendor) { 6178 /* 6179 * first available on Pentium IV and Opteron (K8) 6180 */ 6181 case X86_VENDOR_Intel: 6182 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 6183 break; 6184 case X86_VENDOR_AMD: 6185 create = cpi->cpi_family >= 0xf; 6186 break; 6187 default: 6188 create = 0; 6189 break; 6190 } 6191 if (create) { 6192 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6193 "chunks", CPI_CHUNKS(cpi)); 6194 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6195 "apic-id", cpi->cpi_apicid); 6196 if (cpi->cpi_chipid >= 0) { 6197 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6198 "chip#", cpi->cpi_chipid); 6199 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6200 "clog#", cpi->cpi_clogid); 6201 } 6202 } 6203 6204 /* cpuid-features */ 6205 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6206 "cpuid-features", CPI_FEATURES_EDX(cpi)); 6207 6208 6209 /* cpuid-features-ecx */ 6210 switch (cpi->cpi_vendor) { 6211 case X86_VENDOR_Intel: 6212 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf; 6213 break; 6214 case X86_VENDOR_AMD: 6215 create = cpi->cpi_family >= 0xf; 6216 break; 6217 default: 6218 create = 0; 6219 break; 6220 } 6221 if (create) 6222 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6223 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi)); 6224 6225 /* ext-cpuid-features */ 6226 switch (cpi->cpi_vendor) { 6227 case X86_VENDOR_Intel: 6228 case X86_VENDOR_AMD: 6229 case X86_VENDOR_Cyrix: 6230 case X86_VENDOR_TM: 6231 case X86_VENDOR_Centaur: 6232 create = cpi->cpi_xmaxeax >= 0x80000001; 6233 break; 6234 default: 6235 create = 0; 6236 break; 6237 } 6238 if (create) { 6239 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6240 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi)); 6241 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi, 6242 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi)); 6243 } 6244 6245 /* 6246 * Brand String first appeared in Intel Pentium IV, AMD K5 6247 * model 1, and Cyrix GXm. On earlier models we try and 6248 * simulate something similar .. so this string should always 6249 * same -something- about the processor, however lame. 6250 */ 6251 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi, 6252 "brand-string", cpi->cpi_brandstr); 6253 6254 /* 6255 * Finally, cache and tlb information 6256 */ 6257 switch (x86_which_cacheinfo(cpi)) { 6258 case X86_VENDOR_Intel: 6259 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 6260 break; 6261 case X86_VENDOR_Cyrix: 6262 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props); 6263 break; 6264 case X86_VENDOR_AMD: 6265 amd_cache_info(cpi, cpu_devi); 6266 break; 6267 default: 6268 break; 6269 } 6270 } 6271 6272 struct l2info { 6273 int *l2i_csz; 6274 int *l2i_lsz; 6275 int *l2i_assoc; 6276 int l2i_ret; 6277 }; 6278 6279 /* 6280 * A cacheinfo walker that fetches the size, line-size and associativity 6281 * of the L2 cache 6282 */ 6283 static int 6284 intel_l2cinfo(void *arg, const struct cachetab *ct) 6285 { 6286 struct l2info *l2i = arg; 6287 int *ip; 6288 6289 if (ct->ct_label != l2_cache_str && 6290 ct->ct_label != sl2_cache_str) 6291 return (0); /* not an L2 -- keep walking */ 6292 6293 if ((ip = l2i->l2i_csz) != NULL) 6294 *ip = ct->ct_size; 6295 if ((ip = l2i->l2i_lsz) != NULL) 6296 *ip = ct->ct_line_size; 6297 if ((ip = l2i->l2i_assoc) != NULL) 6298 *ip = ct->ct_assoc; 6299 l2i->l2i_ret = ct->ct_size; 6300 return (1); /* was an L2 -- terminate walk */ 6301 } 6302 6303 /* 6304 * AMD L2/L3 Cache and TLB Associativity Field Definition: 6305 * 6306 * Unlike the associativity for the L1 cache and tlb where the 8 bit 6307 * value is the associativity, the associativity for the L2 cache and 6308 * tlb is encoded in the following table. The 4 bit L2 value serves as 6309 * an index into the amd_afd[] array to determine the associativity. 6310 * -1 is undefined. 0 is fully associative. 6311 */ 6312 6313 static int amd_afd[] = 6314 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0}; 6315 6316 static void 6317 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i) 6318 { 6319 struct cpuid_regs *cp; 6320 uint_t size, assoc; 6321 int i; 6322 int *ip; 6323 6324 if (cpi->cpi_xmaxeax < 0x80000006) 6325 return; 6326 cp = &cpi->cpi_extd[6]; 6327 6328 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 && 6329 (size = BITX(cp->cp_ecx, 31, 16)) != 0) { 6330 uint_t cachesz = size * 1024; 6331 assoc = amd_afd[i]; 6332 6333 ASSERT(assoc != -1); 6334 6335 if ((ip = l2i->l2i_csz) != NULL) 6336 *ip = cachesz; 6337 if ((ip = l2i->l2i_lsz) != NULL) 6338 *ip = BITX(cp->cp_ecx, 7, 0); 6339 if ((ip = l2i->l2i_assoc) != NULL) 6340 *ip = assoc; 6341 l2i->l2i_ret = cachesz; 6342 } 6343 } 6344 6345 int 6346 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc) 6347 { 6348 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 6349 struct l2info __l2info, *l2i = &__l2info; 6350 6351 l2i->l2i_csz = csz; 6352 l2i->l2i_lsz = lsz; 6353 l2i->l2i_assoc = assoc; 6354 l2i->l2i_ret = -1; 6355 6356 switch (x86_which_cacheinfo(cpi)) { 6357 case X86_VENDOR_Intel: 6358 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 6359 break; 6360 case X86_VENDOR_Cyrix: 6361 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo); 6362 break; 6363 case X86_VENDOR_AMD: 6364 amd_l2cacheinfo(cpi, l2i); 6365 break; 6366 default: 6367 break; 6368 } 6369 return (l2i->l2i_ret); 6370 } 6371 6372 #if !defined(__xpv) 6373 6374 uint32_t * 6375 cpuid_mwait_alloc(cpu_t *cpu) 6376 { 6377 uint32_t *ret; 6378 size_t mwait_size; 6379 6380 ASSERT(cpuid_checkpass(CPU, 2)); 6381 6382 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max; 6383 if (mwait_size == 0) 6384 return (NULL); 6385 6386 /* 6387 * kmem_alloc() returns cache line size aligned data for mwait_size 6388 * allocations. mwait_size is currently cache line sized. Neither 6389 * of these implementation details are guarantied to be true in the 6390 * future. 6391 * 6392 * First try allocating mwait_size as kmem_alloc() currently returns 6393 * correctly aligned memory. If kmem_alloc() does not return 6394 * mwait_size aligned memory, then use mwait_size ROUNDUP. 6395 * 6396 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we 6397 * decide to free this memory. 6398 */ 6399 ret = kmem_zalloc(mwait_size, KM_SLEEP); 6400 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) { 6401 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 6402 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size; 6403 *ret = MWAIT_RUNNING; 6404 return (ret); 6405 } else { 6406 kmem_free(ret, mwait_size); 6407 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP); 6408 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret; 6409 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2; 6410 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size); 6411 *ret = MWAIT_RUNNING; 6412 return (ret); 6413 } 6414 } 6415 6416 void 6417 cpuid_mwait_free(cpu_t *cpu) 6418 { 6419 if (cpu->cpu_m.mcpu_cpi == NULL) { 6420 return; 6421 } 6422 6423 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL && 6424 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) { 6425 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual, 6426 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual); 6427 } 6428 6429 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL; 6430 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0; 6431 } 6432 6433 void 6434 patch_tsc_read(int flag) 6435 { 6436 size_t cnt; 6437 6438 switch (flag) { 6439 case TSC_NONE: 6440 cnt = &_no_rdtsc_end - &_no_rdtsc_start; 6441 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); 6442 break; 6443 case TSC_RDTSC_MFENCE: 6444 cnt = &_tsc_mfence_end - &_tsc_mfence_start; 6445 (void) memcpy((void *)tsc_read, 6446 (void *)&_tsc_mfence_start, cnt); 6447 break; 6448 case TSC_RDTSC_LFENCE: 6449 cnt = &_tsc_lfence_end - &_tsc_lfence_start; 6450 (void) memcpy((void *)tsc_read, 6451 (void *)&_tsc_lfence_start, cnt); 6452 break; 6453 case TSC_TSCP: 6454 cnt = &_tscp_end - &_tscp_start; 6455 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt); 6456 break; 6457 default: 6458 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */ 6459 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag); 6460 break; 6461 } 6462 tsc_type = flag; 6463 } 6464 6465 int 6466 cpuid_deep_cstates_supported(void) 6467 { 6468 struct cpuid_info *cpi; 6469 struct cpuid_regs regs; 6470 6471 ASSERT(cpuid_checkpass(CPU, 1)); 6472 6473 cpi = CPU->cpu_m.mcpu_cpi; 6474 6475 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) 6476 return (0); 6477 6478 switch (cpi->cpi_vendor) { 6479 case X86_VENDOR_Intel: 6480 if (cpi->cpi_xmaxeax < 0x80000007) 6481 return (0); 6482 6483 /* 6484 * TSC run at a constant rate in all ACPI C-states? 6485 */ 6486 regs.cp_eax = 0x80000007; 6487 (void) __cpuid_insn(®s); 6488 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE); 6489 6490 default: 6491 return (0); 6492 } 6493 } 6494 6495 #endif /* !__xpv */ 6496 6497 void 6498 post_startup_cpu_fixups(void) 6499 { 6500 #ifndef __xpv 6501 /* 6502 * Some AMD processors support C1E state. Entering this state will 6503 * cause the local APIC timer to stop, which we can't deal with at 6504 * this time. 6505 */ 6506 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) { 6507 on_trap_data_t otd; 6508 uint64_t reg; 6509 6510 if (!on_trap(&otd, OT_DATA_ACCESS)) { 6511 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT); 6512 /* Disable C1E state if it is enabled by BIOS */ 6513 if ((reg >> AMD_ACTONCMPHALT_SHIFT) & 6514 AMD_ACTONCMPHALT_MASK) { 6515 reg &= ~(AMD_ACTONCMPHALT_MASK << 6516 AMD_ACTONCMPHALT_SHIFT); 6517 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg); 6518 } 6519 } 6520 no_trap(); 6521 } 6522 #endif /* !__xpv */ 6523 } 6524 6525 void 6526 enable_pcid(void) 6527 { 6528 if (x86_use_pcid == -1) 6529 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID); 6530 6531 if (x86_use_invpcid == -1) { 6532 x86_use_invpcid = is_x86_feature(x86_featureset, 6533 X86FSET_INVPCID); 6534 } 6535 6536 if (!x86_use_pcid) 6537 return; 6538 6539 /* 6540 * Intel say that on setting PCIDE, it immediately starts using the PCID 6541 * bits; better make sure there's nothing there. 6542 */ 6543 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE); 6544 6545 setcr4(getcr4() | CR4_PCIDE); 6546 } 6547 6548 /* 6549 * Setup necessary registers to enable XSAVE feature on this processor. 6550 * This function needs to be called early enough, so that no xsave/xrstor 6551 * ops will execute on the processor before the MSRs are properly set up. 6552 * 6553 * Current implementation has the following assumption: 6554 * - cpuid_pass1() is done, so that X86 features are known. 6555 * - fpu_probe() is done, so that fp_save_mech is chosen. 6556 */ 6557 void 6558 xsave_setup_msr(cpu_t *cpu) 6559 { 6560 ASSERT(fp_save_mech == FP_XSAVE); 6561 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); 6562 6563 /* Enable OSXSAVE in CR4. */ 6564 setcr4(getcr4() | CR4_OSXSAVE); 6565 /* 6566 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report 6567 * correct value. 6568 */ 6569 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE; 6570 setup_xfem(); 6571 } 6572 6573 /* 6574 * Starting with the Westmere processor the local 6575 * APIC timer will continue running in all C-states, 6576 * including the deepest C-states. 6577 */ 6578 int 6579 cpuid_arat_supported(void) 6580 { 6581 struct cpuid_info *cpi; 6582 struct cpuid_regs regs; 6583 6584 ASSERT(cpuid_checkpass(CPU, 1)); 6585 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 6586 6587 cpi = CPU->cpu_m.mcpu_cpi; 6588 6589 switch (cpi->cpi_vendor) { 6590 case X86_VENDOR_Intel: 6591 /* 6592 * Always-running Local APIC Timer is 6593 * indicated by CPUID.6.EAX[2]. 6594 */ 6595 if (cpi->cpi_maxeax >= 6) { 6596 regs.cp_eax = 6; 6597 (void) cpuid_insn(NULL, ®s); 6598 return (regs.cp_eax & CPUID_INTC_EAX_ARAT); 6599 } else { 6600 return (0); 6601 } 6602 default: 6603 return (0); 6604 } 6605 } 6606 6607 /* 6608 * Check support for Intel ENERGY_PERF_BIAS feature 6609 */ 6610 int 6611 cpuid_iepb_supported(struct cpu *cp) 6612 { 6613 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi; 6614 struct cpuid_regs regs; 6615 6616 ASSERT(cpuid_checkpass(cp, 1)); 6617 6618 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) || 6619 !(is_x86_feature(x86_featureset, X86FSET_MSR))) { 6620 return (0); 6621 } 6622 6623 /* 6624 * Intel ENERGY_PERF_BIAS MSR is indicated by 6625 * capability bit CPUID.6.ECX.3 6626 */ 6627 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6)) 6628 return (0); 6629 6630 regs.cp_eax = 0x6; 6631 (void) cpuid_insn(NULL, ®s); 6632 return (regs.cp_ecx & CPUID_INTC_ECX_PERFBIAS); 6633 } 6634 6635 /* 6636 * Check support for TSC deadline timer 6637 * 6638 * TSC deadline timer provides a superior software programming 6639 * model over local APIC timer that eliminates "time drifts". 6640 * Instead of specifying a relative time, software specifies an 6641 * absolute time as the target at which the processor should 6642 * generate a timer event. 6643 */ 6644 int 6645 cpuid_deadline_tsc_supported(void) 6646 { 6647 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi; 6648 struct cpuid_regs regs; 6649 6650 ASSERT(cpuid_checkpass(CPU, 1)); 6651 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID)); 6652 6653 switch (cpi->cpi_vendor) { 6654 case X86_VENDOR_Intel: 6655 if (cpi->cpi_maxeax >= 1) { 6656 regs.cp_eax = 1; 6657 (void) cpuid_insn(NULL, ®s); 6658 return (regs.cp_ecx & CPUID_DEADLINE_TSC); 6659 } else { 6660 return (0); 6661 } 6662 default: 6663 return (0); 6664 } 6665 } 6666 6667 #if defined(__amd64) && !defined(__xpv) 6668 /* 6669 * Patch in versions of bcopy for high performance Intel Nhm processors 6670 * and later... 6671 */ 6672 void 6673 patch_memops(uint_t vendor) 6674 { 6675 size_t cnt, i; 6676 caddr_t to, from; 6677 6678 if ((vendor == X86_VENDOR_Intel) && 6679 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) { 6680 cnt = &bcopy_patch_end - &bcopy_patch_start; 6681 to = &bcopy_ck_size; 6682 from = &bcopy_patch_start; 6683 for (i = 0; i < cnt; i++) { 6684 *to++ = *from++; 6685 } 6686 } 6687 } 6688 #endif /* __amd64 && !__xpv */ 6689 6690 /* 6691 * We're being asked to tell the system how many bits are required to represent 6692 * the various thread and strand IDs. While it's tempting to derive this based 6693 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite 6694 * correct. Instead, this needs to be based on the number of bits that the APIC 6695 * allows for these different configurations. We only update these to a larger 6696 * value if we find one. 6697 */ 6698 void 6699 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits) 6700 { 6701 struct cpuid_info *cpi; 6702 6703 VERIFY(cpuid_checkpass(CPU, 1)); 6704 cpi = cpu->cpu_m.mcpu_cpi; 6705 6706 if (cpi->cpi_ncore_bits > *core_nbits) { 6707 *core_nbits = cpi->cpi_ncore_bits; 6708 } 6709 6710 if (cpi->cpi_nthread_bits > *strand_nbits) { 6711 *strand_nbits = cpi->cpi_nthread_bits; 6712 } 6713 } 6714 6715 void 6716 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset) 6717 { 6718 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; 6719 struct cpuid_regs cp; 6720 6721 /* 6722 * Reread the CPUID portions that we need for various security 6723 * information. 6724 */ 6725 if (cpi->cpi_vendor == X86_VENDOR_Intel) { 6726 /* 6727 * Check if we now have leaf 7 available to us. 6728 */ 6729 if (cpi->cpi_maxeax < 7) { 6730 bzero(&cp, sizeof (cp)); 6731 cp.cp_eax = 0; 6732 cpi->cpi_maxeax = __cpuid_insn(&cp); 6733 if (cpi->cpi_maxeax < 7) 6734 return; 6735 } 6736 6737 bzero(&cp, sizeof (cp)); 6738 cp.cp_eax = 7; 6739 cp.cp_ecx = 0; 6740 (void) __cpuid_insn(&cp); 6741 cpi->cpi_std[7] = cp; 6742 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) { 6743 /* No xcpuid support */ 6744 if (cpi->cpi_family < 5 || 6745 (cpi->cpi_family == 5 && cpi->cpi_model < 1)) 6746 return; 6747 6748 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) { 6749 bzero(&cp, sizeof (cp)); 6750 cp.cp_eax = CPUID_LEAF_EXT_0; 6751 cpi->cpi_xmaxeax = __cpuid_insn(&cp); 6752 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) { 6753 return; 6754 } 6755 } 6756 6757 bzero(&cp, sizeof (cp)); 6758 cp.cp_eax = CPUID_LEAF_EXT_8; 6759 (void) __cpuid_insn(&cp); 6760 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp); 6761 cpi->cpi_extd[8] = cp; 6762 } else { 6763 /* 6764 * Nothing to do here. Return an empty set which has already 6765 * been zeroed for us. 6766 */ 6767 return; 6768 } 6769 cpuid_scan_security(cpu, fset); 6770 } 6771 6772 /* ARGSUSED */ 6773 static int 6774 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2) 6775 { 6776 uchar_t *fset; 6777 6778 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id); 6779 cpuid_pass_ucode(CPU, fset); 6780 6781 return (0); 6782 } 6783 6784 /* 6785 * After a microcode update where the version has changed, then we need to 6786 * rescan CPUID. To do this we check every CPU to make sure that they have the 6787 * same microcode. Then we perform a cross call to all such CPUs. It's the 6788 * caller's job to make sure that no one else can end up doing an update while 6789 * this is going on. 6790 * 6791 * We assume that the system is microcode capable if we're called. 6792 */ 6793 void 6794 cpuid_post_ucodeadm(void) 6795 { 6796 uint32_t rev; 6797 int i; 6798 struct cpu *cpu; 6799 cpuset_t cpuset; 6800 void *argdata; 6801 uchar_t *f0; 6802 6803 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP); 6804 6805 mutex_enter(&cpu_lock); 6806 cpu = cpu_get(0); 6807 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev; 6808 CPUSET_ONLY(cpuset, 0); 6809 for (i = 1; i < max_ncpus; i++) { 6810 if ((cpu = cpu_get(i)) == NULL) 6811 continue; 6812 6813 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) { 6814 panic("post microcode update CPU %d has differing " 6815 "microcode revision (%u) from CPU 0 (%u)", 6816 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev); 6817 } 6818 CPUSET_ADD(cpuset, i); 6819 } 6820 6821 kpreempt_disable(); 6822 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset), 6823 cpuid_post_ucodeadm_xc); 6824 kpreempt_enable(); 6825 6826 /* 6827 * OK, now look at each CPU and see if their feature sets are equal. 6828 */ 6829 f0 = argdata; 6830 for (i = 1; i < max_ncpus; i++) { 6831 uchar_t *fset; 6832 if (!CPU_IN_SET(cpuset, i)) 6833 continue; 6834 6835 fset = (uchar_t *)((uintptr_t)argdata + 6836 sizeof (x86_featureset) * i); 6837 6838 if (!compare_x86_featureset(f0, fset)) { 6839 panic("Post microcode update CPU %d has " 6840 "differing security feature (%p) set from CPU 0 " 6841 "(%p), not appending to feature set", i, 6842 (void *)fset, (void *)f0); 6843 } 6844 } 6845 6846 mutex_exit(&cpu_lock); 6847 6848 for (i = 0; i < NUM_X86_FEATURES; i++) { 6849 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n", 6850 x86_feature_names[i]); 6851 if (is_x86_feature(f0, i)) { 6852 add_x86_feature(x86_featureset, i); 6853 } 6854 } 6855 kmem_free(argdata, sizeof (x86_featureset) * NCPU); 6856 }