1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
  26  */
  27 /*
  28  * Copyright (c) 2010, Intel Corporation.
  29  * All rights reserved.
  30  */
  31 /*
  32  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  33  */
  34 /*
  35  * Copyright 2019 Joyent, Inc.
  36  */
  37 
  38 /*
  39  * CPU Identification logic
  40  *
  41  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  42  * with the identification of CPUs, their features, and their topologies. More
  43  * specifically, this file helps drive the following:
  44  *
  45  * 1. Enumeration of features of the processor which are used by the kernel to
  46  *    determine what features to enable or disable. These may be instruction set
  47  *    enhancements or features that we use.
  48  *
  49  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  50  *    will be told about through the auxiliary vector.
  51  *
  52  * 3. Understanding the physical topology of the CPU such as the number of
  53  *    caches, how many cores it has, whether or not it supports symmetric
  54  *    multi-processing (SMT), etc.
  55  *
  56  * ------------------------
  57  * CPUID History and Basics
  58  * ------------------------
  59  *
  60  * The cpuid instruction was added by Intel roughly around the time that the
  61  * original Pentium was introduced. The purpose of cpuid was to tell in a
  62  * programmatic fashion information about the CPU that previously was guessed
  63  * at. For example, an important part of cpuid is that we can know what
  64  * extensions to the ISA exist. If you use an invalid opcode you would get a
  65  * #UD, so this method allows a program (whether a user program or the kernel)
  66  * to determine what exists without crashing or getting a SIGILL. Of course,
  67  * this was also during the era of the clones and the AMD Am5x86. The vendor
  68  * name shows up first in cpuid for a reason.
  69  *
  70  * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
  71  * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
  72  * its own meaning. The different leaves are broken down into different regions:
  73  *
  74  *      [ 0, 7fffffff ]                 This region is called the 'basic'
  75  *                                      region. This region is generally defined
  76  *                                      by Intel, though some of the original
  77  *                                      portions have different meanings based
  78  *                                      on the manufacturer. These days, Intel
  79  *                                      adds most new features to this region.
  80  *                                      AMD adds non-Intel compatible
  81  *                                      information in the third, extended
  82  *                                      region. Intel uses this for everything
  83  *                                      including ISA extensions, CPU
  84  *                                      features, cache information, topology,
  85  *                                      and more.
  86  *
  87  *                                      There is a hole carved out of this
  88  *                                      region which is reserved for
  89  *                                      hypervisors.
  90  *
  91  *      [ 40000000, 4fffffff ]          This region, which is found in the
  92  *                                      middle of the previous region, is
  93  *                                      explicitly promised to never be used by
  94  *                                      CPUs. Instead, it is used by hypervisors
  95  *                                      to communicate information about
  96  *                                      themselves to the operating system. The
  97  *                                      values and details are unique for each
  98  *                                      hypervisor.
  99  *
 100  *      [ 80000000, ffffffff ]          This region is called the 'extended'
 101  *                                      region. Some of the low leaves mirror
 102  *                                      parts of the basic leaves. This region
 103  *                                      has generally been used by AMD for
 104  *                                      various extensions. For example, AMD-
 105  *                                      specific information about caches,
 106  *                                      features, and topology are found in this
 107  *                                      region.
 108  *
 109  * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
 110  * and %edx, and then issue the cpuid instruction. At the first leaf in each of
 111  * the ranges, one of the primary things returned is the maximum valid leaf in
 112  * that range. This allows for discovery of what range of CPUID is valid.
 113  *
 114  * The CPUs have potentially surprising behavior when using an invalid leaf or
 115  * unimplemented leaf. If the requested leaf is within the valid basic or
 116  * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
 117  * set to zero. However, if you specify a leaf that is outside of a valid range,
 118  * then instead it will be filled with the last valid _basic_ leaf. For example,
 119  * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
 120  * an invalid extended leaf will return the information for leaf 3.
 121  *
 122  * Some leaves are broken down into sub-leaves. This means that the value
 123  * depends on both the leaf asked for in %eax and a secondary register. For
 124  * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
 125  * additional information. Or when getting topology information in leaf 0xb, the
 126  * initial value in %ecx changes which level of the topology that you are
 127  * getting information about.
 128  *
 129  * cpuid values are always kept to 32 bits regardless of whether or not the
 130  * program is in 64-bit mode. When executing in 64-bit mode, the upper
 131  * 32 bits of the register are always set to zero so that way the values are the
 132  * same regardless of execution mode.
 133  *
 134  * ----------------------
 135  * Identifying Processors
 136  * ----------------------
 137  *
 138  * We can identify a processor in two steps. The first step looks at cpuid leaf
 139  * 0. Leaf 0 contains the processor's vendor information. This is done by
 140  * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
 141  * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
 142  *
 143  * From there, a processor is identified by a combination of three different
 144  * values:
 145  *
 146  *  1. Family
 147  *  2. Model
 148  *  3. Stepping
 149  *
 150  * Each vendor uses the family and model to uniquely identify a processor. The
 151  * way that family and model are changed depends on the vendor. For example,
 152  * Intel has been using family 0x6 for almost all of their processor since the
 153  * Pentium Pro/Pentium II era, often called the P6. The model is used to
 154  * identify the exact processor. Different models are often used for the client
 155  * (consumer) and server parts. Even though each processor often has major
 156  * architectural differences, they still are considered the same family by
 157  * Intel.
 158  *
 159  * On the other hand, each major AMD architecture generally has its own family.
 160  * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
 161  * the model number is used to help identify specific processors.
 162  *
 163  * The stepping is used to refer to a revision of a specific microprocessor. The
 164  * term comes from equipment used to produce masks that are used to create
 165  * integrated circuits.
 166  *
 167  * The information is present in leaf 1, %eax. In technical documentation you
 168  * will see the terms extended model and extended family. The original family,
 169  * model, and stepping fields were each 4 bits wide. If the values in either
 170  * are 0xf, then one is to consult the extended model and extended family, which
 171  * take previously reserved bits and allow for a larger number of models and add
 172  * 0xf to them.
 173  *
 174  * When we process this information, we store the full family, model, and
 175  * stepping in the struct cpuid_info members cpi_family, cpi_model, and
 176  * cpi_step, respectively. Whenever you are performing comparisons with the
 177  * family, model, and stepping, you should use these members and not the raw
 178  * values from cpuid. If you must use the raw values from cpuid directly, you
 179  * must make sure that you add the extended model and family to the base model
 180  * and family.
 181  *
 182  * In general, we do not use information about the family, model, and stepping
 183  * to determine whether or not a feature is present; that is generally driven by
 184  * specific leaves. However, when something we care about on the processor is
 185  * not considered 'architectural' meaning that it is specific to a set of
 186  * processors and not promised in the architecture model to be consistent from
 187  * generation to generation, then we will fall back on this information. The
 188  * most common cases where this comes up is when we have to workaround errata in
 189  * the processor, are dealing with processor-specific features such as CPU
 190  * performance counters, or we want to provide additional information for things
 191  * such as fault management.
 192  *
 193  * While processors also do have a brand string, which is the name that people
 194  * are familiar with when buying the processor, they are not meant for
 195  * programmatic consumption. That is what the family, model, and stepping are
 196  * for.
 197  *
 198  * ------------
 199  * CPUID Passes
 200  * ------------
 201  *
 202  * As part of performing feature detection, we break this into several different
 203  * passes. The passes are as follows:
 204  *
 205  *      Pass 0          This is a primordial pass done in locore.s to deal with
 206  *                      Cyrix CPUs that don't support cpuid. The reality is that
 207  *                      we likely don't run on them any more, but there is still
 208  *                      logic for handling them.
 209  *
 210  *      Pass 1          This is the primary pass and is responsible for doing a
 211  *                      large number of different things:
 212  *
 213  *                      1. Determine which vendor manufactured the CPU and
 214  *                      determining the family, model, and stepping information.
 215  *
 216  *                      2. Gathering a large number of feature flags to
 217  *                      determine which features the CPU support and which
 218  *                      indicate things that we need to do other work in the OS
 219  *                      to enable. Features detected this way are added to the
 220  *                      x86_featureset which can be queried to
 221  *                      determine what we should do. This includes processing
 222  *                      all of the basic and extended CPU features that we care
 223  *                      about.
 224  *
 225  *                      3. Determining the CPU's topology. This includes
 226  *                      information about how many cores and threads are present
 227  *                      in the package. It also is responsible for figuring out
 228  *                      which logical CPUs are potentially part of the same core
 229  *                      and what other resources they might share. For more
 230  *                      information see the 'Topology' section.
 231  *
 232  *                      4. Determining the set of CPU security-specific features
 233  *                      that we need to worry about and determine the
 234  *                      appropriate set of workarounds.
 235  *
 236  *                      Pass 1 on the boot CPU occurs before KMDB is started.
 237  *
 238  *      Pass 2          The second pass is done after startup(). Here, we check
 239  *                      other miscellaneous features. Most of this is gathering
 240  *                      additional basic and extended features that we'll use in
 241  *                      later passes or for debugging support.
 242  *
 243  *      Pass 3          The third pass occurs after the kernel memory allocator
 244  *                      has been fully initialized. This gathers information
 245  *                      where we might need dynamic memory available for our
 246  *                      uses. This includes several varying width leaves that
 247  *                      have cache information and the processor's brand string.
 248  *
 249  *      Pass 4          The fourth and final normal pass is performed after the
 250  *                      kernel has brought most everything online. This is
 251  *                      invoked from post_startup(). In this pass, we go through
 252  *                      the set of features that we have enabled and turn that
 253  *                      into the hardware auxiliary vector features that
 254  *                      userland receives. This is used by userland, primarily
 255  *                      by the run-time link-editor (RTLD), though userland
 256  *                      software could also refer to it directly.
 257  *
 258  *      Microcode       After a microcode update, we do a selective rescan of
 259  *                      the cpuid leaves to determine what features have
 260  *                      changed. Microcode updates can provide more details
 261  *                      about security related features to deal with issues like
 262  *                      Spectre and L1TF. On occasion, vendors have violated
 263  *                      their contract and removed bits. However, we don't try
 264  *                      to detect that because that puts us in a situation that
 265  *                      we really can't deal with. As such, the only thing we
 266  *                      rescan are security related features today. See
 267  *                      cpuid_pass_ucode().
 268  *
 269  * All of the passes (except pass 0) are run on all CPUs. However, for the most
 270  * part we only care about what the boot CPU says about this information and use
 271  * the other CPUs as a rough guide to sanity check that we have the same feature
 272  * set.
 273  *
 274  * We do not support running multiple logical CPUs with disjoint, let alone
 275  * different, feature sets.
 276  *
 277  * ------------------
 278  * Processor Topology
 279  * ------------------
 280  *
 281  * One of the important things that we need to do is to understand the topology
 282  * of the underlying processor. When we say topology in this case, we're trying
 283  * to understand the relationship between the logical CPUs that the operating
 284  * system sees and the underlying physical layout. Different logical CPUs may
 285  * share different resources which can have important consequences for the
 286  * performance of the system. For example, they may share caches, execution
 287  * units, and more.
 288  *
 289  * The topology of the processor changes from generation to generation and
 290  * vendor to vendor.  Along with that, different vendors use different
 291  * terminology, and the operating system itself uses occasionally overlapping
 292  * terminology. It's important to understand what this topology looks like so
 293  * one can understand the different things that we try to calculate and
 294  * determine.
 295  *
 296  * To get started, let's talk about a little bit of terminology that we've used
 297  * so far, is used throughout this file, and is fairly generic across multiple
 298  * vendors:
 299  *
 300  * CPU
 301  *      A central processing unit (CPU) refers to a logical and/or virtual
 302  *      entity that the operating system can execute instructions on. The
 303  *      underlying resources for this CPU may be shared between multiple
 304  *      entities; however, to the operating system it is a discrete unit.
 305  *
 306  * PROCESSOR and PACKAGE
 307  *
 308  *      Generally, when we use the term 'processor' on its own, we are referring
 309  *      to the physical entity that one buys and plugs into a board. However,
 310  *      because processor has been overloaded and one might see it used to mean
 311  *      multiple different levels, we will instead use the term 'package' for
 312  *      the rest of this file. The term package comes from the electrical
 313  *      engineering side and refers to the physical entity that encloses the
 314  *      electronics inside. Strictly speaking the package can contain more than
 315  *      just the CPU, for example, on many processors it may also have what's
 316  *      called an 'integrated graphical processing unit (GPU)'. Because the
 317  *      package can encapsulate multiple units, it is the largest physical unit
 318  *      that we refer to.
 319  *
 320  * SOCKET
 321  *
 322  *      A socket refers to unit on a system board (generally the motherboard)
 323  *      that can receive a package. A single package, or processor, is plugged
 324  *      into a single socket. A system may have multiple sockets. Often times,
 325  *      the term socket is used interchangeably with package and refers to the
 326  *      electrical component that has plugged in, and not the receptacle itself.
 327  *
 328  * CORE
 329  *
 330  *      A core refers to the physical instantiation of a CPU, generally, with a
 331  *      full set of hardware resources available to it. A package may contain
 332  *      multiple cores inside of it or it may just have a single one. A
 333  *      processor with more than one core is often referred to as 'multi-core'.
 334  *      In illumos, we will use the feature X86FSET_CMP to refer to a system
 335  *      that has 'multi-core' processors.
 336  *
 337  *      A core may expose a single logical CPU to the operating system, or it
 338  *      may expose multiple CPUs, which we call threads, defined below.
 339  *
 340  *      Some resources may still be shared by cores in the same package. For
 341  *      example, many processors will share the level 3 cache between cores.
 342  *      Some AMD generations share hardware resources between cores. For more
 343  *      information on that see the section 'AMD Topology'.
 344  *
 345  * THREAD and STRAND
 346  *
 347  *      In this file, generally a thread refers to a hardware resources and not
 348  *      the operating system's logical abstraction. A thread is always exposed
 349  *      as an independent logical CPU to the operating system. A thread belongs
 350  *      to a specific core. A core may have more than one thread. When that is
 351  *      the case, the threads that are part of the same core are often referred
 352  *      to as 'siblings'.
 353  *
 354  *      When multiple threads exist, this is generally referred to as
 355  *      simultaneous multi-threading (SMT). When Intel introduced this in their
 356  *      processors they called it hyper-threading (HT). When multiple threads
 357  *      are active in a core, they split the resources of the core. For example,
 358  *      two threads may share the same set of hardware execution units.
 359  *
 360  *      The operating system often uses the term 'strand' to refer to a thread.
 361  *      This helps disambiguate it from the software concept.
 362  *
 363  * CHIP
 364  *
 365  *      Unfortunately, the term 'chip' is dramatically overloaded. At its most
 366  *      base meaning, it is used to refer to a single integrated circuit, which
 367  *      may or may not be the only thing in the package. In illumos, when you
 368  *      see the term 'chip' it is almost always referring to the same thing as
 369  *      the 'package'. However, many vendors may use chip to refer to one of
 370  *      many integrated circuits that have been placed in the package. As an
 371  *      example, see the subsequent definition.
 372  *
 373  *      To try and keep things consistent, we will only use chip when referring
 374  *      to the entire integrated circuit package, with the exception of the
 375  *      definition of multi-chip module (because it is in the name) and use the
 376  *      term 'die' when we want the more general, potential sub-component
 377  *      definition.
 378  *
 379  * DIE
 380  *
 381  *      A die refers to an integrated circuit. Inside of the package there may
 382  *      be a single die or multiple dies. This is sometimes called a 'chip' in
 383  *      vendor's parlance, but in this file, we use the term die to refer to a
 384  *      subcomponent.
 385  *
 386  * MULTI-CHIP MODULE
 387  *
 388  *      A multi-chip module (MCM) refers to putting multiple distinct chips that
 389  *      are connected together in the same package. When a multi-chip design is
 390  *      used, generally each chip is manufactured independently and then joined
 391  *      together in the package. For example, on AMD's Zen microarchitecture
 392  *      (family 0x17), the package contains several dies (the second meaning of
 393  *      chip from above) that are connected together.
 394  *
 395  * CACHE
 396  *
 397  *      A cache is a part of the processor that maintains copies of recently
 398  *      accessed memory. Caches are split into levels and then into types.
 399  *      Commonly there are one to three levels, called level one, two, and
 400  *      three. The lower the level, the smaller it is, the closer it is to the
 401  *      execution units of the CPU, and the faster it is to access. The layout
 402  *      and design of the cache come in many different flavors, consult other
 403  *      resources for a discussion of those.
 404  *
 405  *      Caches are generally split into two types, the instruction and data
 406  *      cache. The caches contain what their names suggest, the instruction
 407  *      cache has executable program text, while the data cache has all other
 408  *      memory that the processor accesses. As of this writing, data is kept
 409  *      coherent between all of the caches on x86, so if one modifies program
 410  *      text before it is executed, that will be in the data cache, and the
 411  *      instruction cache will be synchronized with that change when the
 412  *      processor actually executes those instructions. This coherency also
 413  *      covers the fact that data could show up in multiple caches.
 414  *
 415  *      Generally, the lowest level caches are specific to a core. However, the
 416  *      last layer cache is shared between some number of cores. The number of
 417  *      CPUs sharing this last level cache is important. This has implications
 418  *      for the choices that the scheduler makes, as accessing memory that might
 419  *      be in a remote cache after thread migration can be quite expensive.
 420  *
 421  *      Sometimes, the word cache is abbreviated with a '$', because in US
 422  *      English the word cache is pronounced the same as cash. So L1D$ refers to
 423  *      the L1 data cache, and L2$ would be the L2 cache. This will not be used
 424  *      in the rest of this theory statement for clarity.
 425  *
 426  * MEMORY CONTROLLER
 427  *
 428  *      The memory controller is a component that provides access to DRAM. Each
 429  *      memory controller can access a set number of DRAM channels. Each channel
 430  *      can have a number of DIMMs (sticks of memory) associated with it. A
 431  *      given package may have more than one memory controller. The association
 432  *      of the memory controller to a group of cores is important as it is
 433  *      cheaper to access memory on the controller that you are associated with.
 434  *
 435  * NUMA
 436  *
 437  *      NUMA or non-uniform memory access, describes a way that systems are
 438  *      built. On x86, any processor core can address all of the memory in the
 439  *      system. However, When using multiple sockets or possibly within a
 440  *      multi-chip module, some of that memory is physically closer and some of
 441  *      it is further. Memory that is further away is more expensive to access.
 442  *      Consider the following image of multiple sockets with memory:
 443  *
 444  *      +--------+                                                +--------+
 445  *      | DIMM A |         +----------+      +----------+         | DIMM D |
 446  *      +--------+-+       |          |      |          |       +-+------+-+
 447  *        | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
 448  *        +--------+-+     |          |      |          |     +-+------+-+
 449  *          | DIMM C |     +----------+      +----------+     | DIMM F |
 450  *          +--------+                                        +--------+
 451  *
 452  *      In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
 453  *      closer to DIMMs D-F. This means that it is cheaper for socket 0 to
 454  *      access DIMMs A-C and more expensive to access D-F as it has to go
 455  *      through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
 456  *      D-F are cheaper than A-C. While the socket form is the most common, when
 457  *      using multi-chip modules, this can also sometimes occur. For another
 458  *      example of this that's more involved, see the AMD topology section.
 459  *
 460  *
 461  * Intel Topology
 462  * --------------
 463  *
 464  * Most Intel processors since Nehalem, (as of this writing the current gen
 465  * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
 466  * the package is a single monolithic die. MCMs currently aren't used. Most
 467  * parts have three levels of caches, with the L3 cache being shared between
 468  * all of the cores on the package. The L1/L2 cache is generally specific to
 469  * an individual core. The following image shows at a simplified level what
 470  * this looks like. The memory controller is commonly part of something called
 471  * the 'Uncore', that used to be separate physical chips that were not a part of
 472  * the package, but are now part of the same chip.
 473  *
 474  *  +-----------------------------------------------------------------------+
 475  *  | Package                                                               |
 476  *  |  +-------------------+  +-------------------+  +-------------------+  |
 477  *  |  | Core              |  | Core              |  | Core              |  |
 478  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 479  *  |  |  | Thread | | L | |  |  | Thread | | L | |  |  | Thread | | L | |  |
 480  *  |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |
 481  *  |  |  +--------+ |   | |  |  +--------+ |   | |  |  +--------+ |   | |  |
 482  *  |  |  | Thread | |   | |  |  | Thread | |   | |  |  | Thread | |   | |  |
 483  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 484  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 485  *  |  |  | L2 Cache     | |  |  | L2 Cache     | |  |  | L2 Cache     | |  |
 486  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 487  *  |  +-------------------+  +-------------------+  +-------------------+  |
 488  *  | +-------------------------------------------------------------------+ |
 489  *  | |                         Shared L3 Cache                           | |
 490  *  | +-------------------------------------------------------------------+ |
 491  *  | +-------------------------------------------------------------------+ |
 492  *  | |                        Memory Controller                          | |
 493  *  | +-------------------------------------------------------------------+ |
 494  *  +-----------------------------------------------------------------------+
 495  *
 496  * A side effect of this current architecture is that what we care about from a
 497  * scheduling and topology perspective, is simplified. In general we care about
 498  * understanding which logical CPUs are part of the same core and socket.
 499  *
 500  * To determine the relationship between threads and cores, Intel initially used
 501  * the identifier in the advanced programmable interrupt controller (APIC). They
 502  * also added cpuid leaf 4 to give additional information about the number of
 503  * threads and CPUs in the processor. With the addition of x2apic (which
 504  * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
 505  * additional cpuid topology leaf 0xB was added.
 506  *
 507  * AMD Topology
 508  * ------------
 509  *
 510  * When discussing AMD topology, we want to break this into three distinct
 511  * generations of topology. There's the basic topology that has been used in
 512  * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
 513  * with family 0x15 (Bulldozer), and there's the topology that was introduced
 514  * with family 0x17 (Zen). AMD also has some additional terminology that's worth
 515  * talking about.
 516  *
 517  * Until the introduction of family 0x17 (Zen), AMD did not implement something
 518  * that they considered SMT. Whether or not the AMD processors have SMT
 519  * influences many things including scheduling and reliability, availability,
 520  * and serviceability (RAS) features.
 521  *
 522  * NODE
 523  *
 524  *      AMD uses the term node to refer to a die that contains a number of cores
 525  *      and I/O resources. Depending on the processor family and model, more
 526  *      than one node can be present in the package. When there is more than one
 527  *      node this indicates a multi-chip module. Usually each node has its own
 528  *      access to memory and I/O devices. This is important and generally
 529  *      different from the corresponding Intel Nehalem-Skylake+ processors. As a
 530  *      result, we track this relationship in the operating system.
 531  *
 532  *      In processors with an L3 cache, the L3 cache is generally shared across
 533  *      the entire node, though the way this is carved up varies from generation
 534  *      to generation.
 535  *
 536  * BULLDOZER
 537  *
 538  *      Starting with the Bulldozer family (0x15) and continuing until the
 539  *      introduction of the Zen microarchitecture, AMD introduced the idea of a
 540  *      compute unit. In a compute unit, two traditional cores share a number of
 541  *      hardware resources. Critically, they share the FPU, L1 instruction
 542  *      cache, and the L2 cache. Several compute units were then combined inside
 543  *      of a single node.  Because the integer execution units, L1 data cache,
 544  *      and some other resources were not shared between the cores, AMD never
 545  *      considered this to be SMT.
 546  *
 547  * ZEN
 548  *
 549  *      The Zen family (0x17) uses a multi-chip module (MCM) design, the module
 550  *      is called Zeppelin. These modules are similar to the idea of nodes used
 551  *      previously. Each of these nodes has two DRAM channels which all of the
 552  *      cores in the node can access uniformly. These nodes are linked together
 553  *      in the package, creating a NUMA environment.
 554  *
 555  *      The Zeppelin die itself contains two different 'core complexes'. Each
 556  *      core complex consists of four cores which each have two threads, for a
 557  *      total of 8 logical CPUs per complex. Unlike other generations,
 558  *      where all the logical CPUs in a given node share the L3 cache, here each
 559  *      core complex has its own shared L3 cache.
 560  *
 561  *      A further thing that we need to consider is that in some configurations,
 562  *      particularly with the Threadripper line of processors, not every die
 563  *      actually has its memory controllers wired up to actual memory channels.
 564  *      This means that some cores have memory attached to them and others
 565  *      don't.
 566  *
 567  *      To put Zen in perspective, consider the following images:
 568  *
 569  *      +--------------------------------------------------------+
 570  *      | Core Complex                                           |
 571  *      | +-------------------+    +-------------------+  +---+  |
 572  *      | | Core       +----+ |    | Core       +----+ |  |   |  |
 573  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  |   |  |
 574  *      | | | Thread | +----+ |    | | Thread | +----+ |  |   |  |
 575  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  | L |  |
 576  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  | 3 |  |
 577  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 578  *      | +-------------------+    +-------------------+  | C |  |
 579  *      | +-------------------+    +-------------------+  | a |  |
 580  *      | | Core       +----+ |    | Core       +----+ |  | c |  |
 581  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  | h |  |
 582  *      | | | Thread | +----+ |    | | Thread | +----+ |  | e |  |
 583  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  |   |  |
 584  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  |   |  |
 585  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 586  *      | +-------------------+    +-------------------+  +---+  |
 587  *      |                                                        |
 588  *      +--------------------------------------------------------+
 589  *
 590  *  This first image represents a single Zen core complex that consists of four
 591  *  cores.
 592  *
 593  *
 594  *      +--------------------------------------------------------+
 595  *      | Zeppelin Die                                           |
 596  *      |  +--------------------------------------------------+  |
 597  *      |  |         I/O Units (PCIe, SATA, USB, etc.)        |  |
 598  *      |  +--------------------------------------------------+  |
 599  *      |                           HH                           |
 600  *      |          +-----------+    HH    +-----------+          |
 601  *      |          |           |    HH    |           |          |
 602  *      |          |    Core   |==========|    Core   |          |
 603  *      |          |  Complex  |==========|  Complex  |          |
 604  *      |          |           |    HH    |           |          |
 605  *      |          +-----------+    HH    +-----------+          |
 606  *      |                           HH                           |
 607  *      |  +--------------------------------------------------+  |
 608  *      |  |                Memory Controller                 |  |
 609  *      |  +--------------------------------------------------+  |
 610  *      |                                                        |
 611  *      +--------------------------------------------------------+
 612  *
 613  *  This image represents a single Zeppelin Die. Note how both cores are
 614  *  connected to the same memory controller and I/O units. While each core
 615  *  complex has its own L3 cache as seen in the first image, they both have
 616  *  uniform access to memory.
 617  *
 618  *
 619  *                      PP                     PP
 620  *                      PP                     PP
 621  *           +----------PP---------------------PP---------+
 622  *           |          PP                     PP         |
 623  *           |    +-----------+          +-----------+    |
 624  *           |    |           |          |           |    |
 625  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 626  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 627  *           |    |           |          |           |    |
 628  *           |    +-----------+ooo    ...+-----------+    |
 629  *           |          HH      ooo  ...       HH         |
 630  *           |          HH        oo..         HH         |
 631  *           |          HH        ..oo         HH         |
 632  *           |          HH      ...  ooo       HH         |
 633  *           |    +-----------+...    ooo+-----------+    |
 634  *           |    |           |          |           |    |
 635  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 636  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 637  *           |    |           |          |           |    |
 638  *           |    +-----------+          +-----------+    |
 639  *           |          PP                     PP         |
 640  *           +----------PP---------------------PP---------+
 641  *                      PP                     PP
 642  *                      PP                     PP
 643  *
 644  *  This image represents a single Zen package. In this example, it has four
 645  *  Zeppelin dies, though some configurations only have a single one. In this
 646  *  example, each die is directly connected to the next. Also, each die is
 647  *  represented as being connected to memory by the 'M' character and connected
 648  *  to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
 649  *  die is made up of two core complexes, we have multiple different NUMA
 650  *  domains that we care about for these systems.
 651  *
 652  * CPUID LEAVES
 653  *
 654  * There are a few different CPUID leaves that we can use to try and understand
 655  * the actual state of the world. As part of the introduction of family 0xf, AMD
 656  * added CPUID leaf 0x80000008. This leaf tells us the number of logical
 657  * processors that are in the system. Because families before Zen didn't have
 658  * SMT, this was always the number of cores that were in the system. However, it
 659  * should always be thought of as the number of logical threads to be consistent
 660  * between generations. In addition we also get the size of the APIC ID that is
 661  * used to represent the number of logical processors. This is important for
 662  * deriving topology information.
 663  *
 664  * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
 665  * bit between Bulldozer and later families, but it is quite useful in
 666  * determining the topology information. Because this information has changed
 667  * across family generations, it's worth calling out what these mean
 668  * explicitly. The registers have the following meanings:
 669  *
 670  *      %eax    The APIC ID. The entire register is defined to have a 32-bit
 671  *              APIC ID, even though on systems without x2apic support, it will
 672  *              be limited to 8 bits.
 673  *
 674  *      %ebx    On Bulldozer-era systems this contains information about the
 675  *              number of cores that are in a compute unit (cores that share
 676  *              resources). It also contains a per-package compute unit ID that
 677  *              identifies which compute unit the logical CPU is a part of.
 678  *
 679  *              On Zen-era systems this instead contains the number of threads
 680  *              per core and the ID of the core that the logical CPU is a part
 681  *              of. Note, this ID is unique only to the package, it is not
 682  *              globally unique across the entire system.
 683  *
 684  *      %ecx    This contains the number of nodes that exist in the package. It
 685  *              also contains an ID that identifies which node the logical CPU
 686  *              is a part of.
 687  *
 688  * Finally, we also use cpuid leaf 0x8000001D to determine information about the
 689  * cache layout to determine which logical CPUs are sharing which caches.
 690  *
 691  * illumos Topology
 692  * ----------------
 693  *
 694  * Based on the above we synthesize the information into several different
 695  * variables that we store in the 'struct cpuid_info'. We'll go into the details
 696  * of what each member is supposed to represent and their uniqueness. In
 697  * general, there are two levels of uniqueness that we care about. We care about
 698  * an ID that is globally unique. That means that it will be unique across all
 699  * entities in the system. For example, the default logical CPU ID is globally
 700  * unique. On the other hand, there is some information that we only care about
 701  * being unique within the context of a single package / socket. Here are the
 702  * variables that we keep track of and their meaning.
 703  *
 704  * Several of the values that are asking for an identifier, with the exception
 705  * of cpi_apicid, are allowed to be synthetic.
 706  *
 707  *
 708  * cpi_apicid
 709  *
 710  *      This is the value of the CPU's APIC id. This should be the full 32-bit
 711  *      ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
 712  *      APIC ID. This value is globally unique between all logical CPUs across
 713  *      all packages. This is usually required by the APIC.
 714  *
 715  * cpi_chipid
 716  *
 717  *      This value indicates the ID of the package that the logical CPU is a
 718  *      part of. This value is allowed to be synthetic. It is usually derived by
 719  *      taking the CPU's APIC ID and determining how many bits are used to
 720  *      represent CPU cores in the package. All logical CPUs that are part of
 721  *      the same package must have the same value.
 722  *
 723  * cpi_coreid
 724  *
 725  *      This represents the ID of a CPU core. Two logical CPUs should only have
 726  *      the same cpi_coreid value if they are part of the same core. These
 727  *      values may be synthetic. On systems that support SMT, this value is
 728  *      usually derived from the APIC ID, otherwise it is often synthetic and
 729  *      just set to the value of the cpu_id in the cpu_t.
 730  *
 731  * cpi_pkgcoreid
 732  *
 733  *      This is similar to the cpi_coreid in that logical CPUs that are part of
 734  *      the same core should have the same ID. The main difference is that these
 735  *      values are only required to be unique to a given socket.
 736  *
 737  * cpi_clogid
 738  *
 739  *      This represents the logical ID of a logical CPU. This value should be
 740  *      unique within a given socket for each logical CPU. This is allowed to be
 741  *      synthetic, though it is usually based off of the CPU's apic ID. The
 742  *      broader system expects that logical CPUs that have are part of the same
 743  *      core have contiguous numbers. For example, if there were two threads per
 744  *      core, then the core IDs divided by two should be the same and the first
 745  *      modulus two should be zero and the second one. For example, IDs 4 and 5
 746  *      indicate two logical CPUs that are part of the same core. But IDs 5 and
 747  *      6 represent two logical CPUs that are part of different cores.
 748  *
 749  *      While it is common for the cpi_coreid and the cpi_clogid to be derived
 750  *      from the same source, strictly speaking, they don't have to be and the
 751  *      two values should be considered logically independent. One should not
 752  *      try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
 753  *      some kind of relationship. While this is tempting, we've seen cases on
 754  *      AMD family 0xf where the system's cpu id is not related to its APIC ID.
 755  *
 756  * cpi_ncpu_per_chip
 757  *
 758  *      This value indicates the total number of logical CPUs that exist in the
 759  *      physical package. Critically, this is not the number of logical CPUs
 760  *      that exist for just the single core.
 761  *
 762  *      This value should be the same for all logical CPUs in the same package.
 763  *
 764  * cpi_ncore_per_chip
 765  *
 766  *      This value indicates the total number of physical CPU cores that exist
 767  *      in the package. The system compares this value with cpi_ncpu_per_chip to
 768  *      determine if simultaneous multi-threading (SMT) is enabled. When
 769  *      cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
 770  *      the X86FSET_HTT feature is not set. If this value is greater than one,
 771  *      than we consider the processor to have the feature X86FSET_CMP, to
 772  *      indicate that there is support for more than one core.
 773  *
 774  *      This value should be the same for all logical CPUs in the same package.
 775  *
 776  * cpi_procnodes_per_pkg
 777  *
 778  *      This value indicates the number of 'nodes' that exist in the package.
 779  *      When processors are actually a multi-chip module, this represents the
 780  *      number of such modules that exist in the package. Currently, on Intel
 781  *      based systems this member is always set to 1.
 782  *
 783  *      This value should be the same for all logical CPUs in the same package.
 784  *
 785  * cpi_procnodeid
 786  *
 787  *      This value indicates the ID of the node that the logical CPU is a part
 788  *      of. All logical CPUs that are in the same node must have the same value
 789  *      here. This value must be unique across all of the packages in the
 790  *      system.  On Intel based systems, this is currently set to the value in
 791  *      cpi_chipid because there is only one node.
 792  *
 793  * cpi_cores_per_compunit
 794  *
 795  *      This value indicates the number of cores that are part of a compute
 796  *      unit. See the AMD topology section for this. This member only has real
 797  *      meaning currently for AMD Bulldozer family processors. For all other
 798  *      processors, this should currently be set to 1.
 799  *
 800  * cpi_compunitid
 801  *
 802  *      This indicates the compute unit that the logical CPU belongs to. For
 803  *      processors without AMD Bulldozer-style compute units this should be set
 804  *      to the value of cpi_coreid.
 805  *
 806  * cpi_ncpu_shr_last_cache
 807  *
 808  *      This indicates the number of logical CPUs that are sharing the same last
 809  *      level cache. This value should be the same for all CPUs that are sharing
 810  *      that cache. The last cache refers to the cache that is closest to memory
 811  *      and furthest away from the CPU.
 812  *
 813  * cpi_last_lvl_cacheid
 814  *
 815  *      This indicates the ID of the last cache that the logical CPU uses. This
 816  *      cache is often shared between multiple logical CPUs and is the cache
 817  *      that is closest to memory and furthest away from the CPU. This value
 818  *      should be the same for a group of logical CPUs only if they actually
 819  *      share the same last level cache. IDs should not overlap between
 820  *      packages.
 821  *
 822  * cpi_ncore_bits
 823  *
 824  *      This indicates the number of bits that are required to represent all of
 825  *      the cores in the system. As cores are derived based on their APIC IDs,
 826  *      we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
 827  *      this value to be larger than the actual number of IDs that are present
 828  *      in the system. This is used to size tables by the CMI framework. It is
 829  *      only filled in for Intel and AMD CPUs.
 830  *
 831  * cpi_nthread_bits
 832  *
 833  *      This indicates the number of bits required to represent all of the IDs
 834  *      that cover the logical CPUs that exist on a given core. It's OK for this
 835  *      value to be larger than the actual number of IDs that are present in the
 836  *      system.  This is used to size tables by the CMI framework. It is
 837  *      only filled in for Intel and AMD CPUs.
 838  *
 839  * -----------
 840  * Hypervisors
 841  * -----------
 842  *
 843  * If trying to manage the differences between vendors wasn't bad enough, it can
 844  * get worse thanks to our friend hardware virtualization. Hypervisors are given
 845  * the ability to interpose on all cpuid instructions and change them to suit
 846  * their purposes. In general, this is necessary as the hypervisor wants to be
 847  * able to present a more uniform set of features or not necessarily give the
 848  * guest operating system kernel knowledge of all features so it can be
 849  * more easily migrated between systems.
 850  *
 851  * When it comes to trying to determine topology information, this can be a
 852  * double edged sword. When a hypervisor doesn't actually implement a cpuid
 853  * leaf, it'll often return all zeros. Because of that, you'll often see various
 854  * checks scattered about fields being non-zero before we assume we can use
 855  * them.
 856  *
 857  * When it comes to topology information, the hypervisor is often incentivized
 858  * to lie to you about topology. This is because it doesn't always actually
 859  * guarantee that topology at all. The topology path we take in the system
 860  * depends on how the CPU advertises itself. If it advertises itself as an Intel
 861  * or AMD CPU, then we basically do our normal path. However, when they don't
 862  * use an actual vendor, then that usually turns into multiple one-core CPUs
 863  * that we enumerate that are often on different sockets. The actual behavior
 864  * depends greatly on what the hypervisor actually exposes to us.
 865  *
 866  * --------------------
 867  * Exposing Information
 868  * --------------------
 869  *
 870  * We expose CPUID information in three different forms in the system.
 871  *
 872  * The first is through the x86_featureset variable. This is used in conjunction
 873  * with the is_x86_feature() function. This is queried by x86-specific functions
 874  * to determine which features are or aren't present in the system and to make
 875  * decisions based upon them. For example, users of this include everything from
 876  * parts of the system dedicated to reliability, availability, and
 877  * serviceability (RAS), to making decisions about how to handle security
 878  * mitigations, to various x86-specific drivers. General purpose or
 879  * architecture independent drivers should never be calling this function.
 880  *
 881  * The second means is through the auxiliary vector. The auxiliary vector is a
 882  * series of tagged data that the kernel passes down to a user program when it
 883  * begins executing. This information is used to indicate to programs what
 884  * instruction set extensions are present. For example, information about the
 885  * CPU supporting the machine check architecture (MCA) wouldn't be passed down
 886  * since user programs cannot make use of it. However, things like the AVX
 887  * instruction sets are. Programs use this information to make run-time
 888  * decisions about what features they should use. As an example, the run-time
 889  * link-editor (rtld) can relocate different functions depending on the hardware
 890  * support available.
 891  *
 892  * The final form is through a series of accessor functions that all have the
 893  * form cpuid_get*. This is used by a number of different subsystems in the
 894  * kernel to determine more detailed information about what we're running on,
 895  * topology information, etc. Some of these subsystems include processor groups
 896  * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
 897  * microcode, and performance monitoring. These functions all ASSERT that the
 898  * CPU they're being called on has reached a certain cpuid pass. If the passes
 899  * are rearranged, then this needs to be adjusted.
 900  */
 901 
 902 #include <sys/types.h>
 903 #include <sys/archsystm.h>
 904 #include <sys/x86_archext.h>
 905 #include <sys/kmem.h>
 906 #include <sys/systm.h>
 907 #include <sys/cmn_err.h>
 908 #include <sys/sunddi.h>
 909 #include <sys/sunndi.h>
 910 #include <sys/cpuvar.h>
 911 #include <sys/processor.h>
 912 #include <sys/sysmacros.h>
 913 #include <sys/pg.h>
 914 #include <sys/fp.h>
 915 #include <sys/controlregs.h>
 916 #include <sys/bitmap.h>
 917 #include <sys/auxv_386.h>
 918 #include <sys/memnode.h>
 919 #include <sys/pci_cfgspace.h>
 920 #include <sys/comm_page.h>
 921 #include <sys/mach_mmu.h>
 922 #include <sys/ucode.h>
 923 #include <sys/tsc.h>
 924 
 925 #ifdef __xpv
 926 #include <sys/hypervisor.h>
 927 #else
 928 #include <sys/ontrap.h>
 929 #endif
 930 
 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
 932 uint_t x86_type = X86_TYPE_OTHER;
 933 uint_t x86_clflush_size = 0;
 934 
 935 #if defined(__xpv)
 936 int x86_use_pcid = 0;
 937 int x86_use_invpcid = 0;
 938 #else
 939 int x86_use_pcid = -1;
 940 int x86_use_invpcid = -1;
 941 #endif
 942 
 943 uint_t pentiumpro_bug4046376;
 944 
 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 946 
 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
 948         "lgpg",
 949         "tsc",
 950         "msr",
 951         "mtrr",
 952         "pge",
 953         "de",
 954         "cmov",
 955         "mmx",
 956         "mca",
 957         "pae",
 958         "cv8",
 959         "pat",
 960         "sep",
 961         "sse",
 962         "sse2",
 963         "htt",
 964         "asysc",
 965         "nx",
 966         "sse3",
 967         "cx16",
 968         "cmp",
 969         "tscp",
 970         "mwait",
 971         "sse4a",
 972         "cpuid",
 973         "ssse3",
 974         "sse4_1",
 975         "sse4_2",
 976         "1gpg",
 977         "clfsh",
 978         "64",
 979         "aes",
 980         "pclmulqdq",
 981         "xsave",
 982         "avx",
 983         "vmx",
 984         "svm",
 985         "topoext",
 986         "f16c",
 987         "rdrand",
 988         "x2apic",
 989         "avx2",
 990         "bmi1",
 991         "bmi2",
 992         "fma",
 993         "smep",
 994         "smap",
 995         "adx",
 996         "rdseed",
 997         "mpx",
 998         "avx512f",
 999         "avx512dq",
1000         "avx512pf",
1001         "avx512er",
1002         "avx512cd",
1003         "avx512bw",
1004         "avx512vl",
1005         "avx512fma",
1006         "avx512vbmi",
1007         "avx512_vpopcntdq",
1008         "avx512_4vnniw",
1009         "avx512_4fmaps",
1010         "xsaveopt",
1011         "xsavec",
1012         "xsaves",
1013         "sha",
1014         "umip",
1015         "pku",
1016         "ospke",
1017         "pcid",
1018         "invpcid",
1019         "ibrs",
1020         "ibpb",
1021         "stibp",
1022         "ssbd",
1023         "ssbd_virt",
1024         "rdcl_no",
1025         "ibrs_all",
1026         "rsba",
1027         "ssb_no",
1028         "stibp_all",
1029         "flush_cmd",
1030         "l1d_vmentry_no",
1031         "fsgsbase",
1032         "clflushopt",
1033         "clwb",
1034         "monitorx",
1035         "clzero",
1036         "xop",
1037         "fma4",
1038         "tbm",
1039         "avx512_vnni",
1040         "amd_pcec",
1041         "mb_clear",
1042         "mds_no",
1043         "core_thermal",
1044         "pkg_thermal"
1045 };
1046 
1047 boolean_t
1048 is_x86_feature(void *featureset, uint_t feature)
1049 {
1050         ASSERT(feature < NUM_X86_FEATURES);
1051         return (BT_TEST((ulong_t *)featureset, feature));
1052 }
1053 
1054 void
1055 add_x86_feature(void *featureset, uint_t feature)
1056 {
1057         ASSERT(feature < NUM_X86_FEATURES);
1058         BT_SET((ulong_t *)featureset, feature);
1059 }
1060 
1061 void
1062 remove_x86_feature(void *featureset, uint_t feature)
1063 {
1064         ASSERT(feature < NUM_X86_FEATURES);
1065         BT_CLEAR((ulong_t *)featureset, feature);
1066 }
1067 
1068 boolean_t
1069 compare_x86_featureset(void *setA, void *setB)
1070 {
1071         /*
1072          * We assume that the unused bits of the bitmap are always zero.
1073          */
1074         if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1075                 return (B_TRUE);
1076         } else {
1077                 return (B_FALSE);
1078         }
1079 }
1080 
1081 void
1082 print_x86_featureset(void *featureset)
1083 {
1084         uint_t i;
1085 
1086         for (i = 0; i < NUM_X86_FEATURES; i++) {
1087                 if (is_x86_feature(featureset, i)) {
1088                         cmn_err(CE_CONT, "?x86_feature: %s\n",
1089                             x86_feature_names[i]);
1090                 }
1091         }
1092 }
1093 
1094 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1095 static size_t xsave_state_size = 0;
1096 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1097 boolean_t xsave_force_disable = B_FALSE;
1098 extern int disable_smap;
1099 
1100 /*
1101  * This is set to platform type we are running on.
1102  */
1103 static int platform_type = -1;
1104 
1105 #if !defined(__xpv)
1106 /*
1107  * Variable to patch if hypervisor platform detection needs to be
1108  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1109  */
1110 int enable_platform_detection = 1;
1111 #endif
1112 
1113 /*
1114  * monitor/mwait info.
1115  *
1116  * size_actual and buf_actual are the real address and size allocated to get
1117  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
1118  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
1119  * processor cache-line alignment, but this is not guarantied in the furture.
1120  */
1121 struct mwait_info {
1122         size_t          mon_min;        /* min size to avoid missed wakeups */
1123         size_t          mon_max;        /* size to avoid false wakeups */
1124         size_t          size_actual;    /* size actually allocated */
1125         void            *buf_actual;    /* memory actually allocated */
1126         uint32_t        support;        /* processor support of monitor/mwait */
1127 };
1128 
1129 /*
1130  * xsave/xrestor info.
1131  *
1132  * This structure contains HW feature bits and the size of the xsave save area.
1133  * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1134  * (xsave_state) to describe the xsave layout. However, at runtime the
1135  * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1136  * xsave_state structure simply represents the legacy layout of the beginning
1137  * of the xsave area.
1138  */
1139 struct xsave_info {
1140         uint32_t        xsav_hw_features_low;   /* Supported HW features */
1141         uint32_t        xsav_hw_features_high;  /* Supported HW features */
1142         size_t          xsav_max_size;  /* max size save area for HW features */
1143         size_t          ymm_size;       /* AVX: size of ymm save area */
1144         size_t          ymm_offset;     /* AVX: offset for ymm save area */
1145         size_t          bndregs_size;   /* MPX: size of bndregs save area */
1146         size_t          bndregs_offset; /* MPX: offset for bndregs save area */
1147         size_t          bndcsr_size;    /* MPX: size of bndcsr save area */
1148         size_t          bndcsr_offset;  /* MPX: offset for bndcsr save area */
1149         size_t          opmask_size;    /* AVX512: size of opmask save */
1150         size_t          opmask_offset;  /* AVX512: offset for opmask save */
1151         size_t          zmmlo_size;     /* AVX512: size of zmm 256 save */
1152         size_t          zmmlo_offset;   /* AVX512: offset for zmm 256 save */
1153         size_t          zmmhi_size;     /* AVX512: size of zmm hi reg save */
1154         size_t          zmmhi_offset;   /* AVX512: offset for zmm hi reg save */
1155 };
1156 
1157 
1158 /*
1159  * These constants determine how many of the elements of the
1160  * cpuid we cache in the cpuid_info data structure; the
1161  * remaining elements are accessible via the cpuid instruction.
1162  */
1163 
1164 #define NMAX_CPI_STD    8               /* eax = 0 .. 7 */
1165 #define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
1166 
1167 /*
1168  * See the big theory statement for a more detailed explanation of what some of
1169  * these members mean.
1170  */
1171 struct cpuid_info {
1172         uint_t cpi_pass;                /* last pass completed */
1173         /*
1174          * standard function information
1175          */
1176         uint_t cpi_maxeax;              /* fn 0: %eax */
1177         char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
1178         uint_t cpi_vendor;              /* enum of cpi_vendorstr */
1179 
1180         uint_t cpi_family;              /* fn 1: extended family */
1181         uint_t cpi_model;               /* fn 1: extended model */
1182         uint_t cpi_step;                /* fn 1: stepping */
1183         chipid_t cpi_chipid;            /* fn 1: %ebx:  Intel: chip # */
1184                                         /*              AMD: package/socket # */
1185         uint_t cpi_brandid;             /* fn 1: %ebx: brand ID */
1186         int cpi_clogid;                 /* fn 1: %ebx: thread # */
1187         uint_t cpi_ncpu_per_chip;       /* fn 1: %ebx: logical cpu count */
1188         uint8_t cpi_cacheinfo[16];      /* fn 2: intel-style cache desc */
1189         uint_t cpi_ncache;              /* fn 2: number of elements */
1190         uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1191         id_t cpi_last_lvl_cacheid;      /* fn 4: %eax: derived cache id */
1192         uint_t cpi_cache_leaf_size;     /* Number of cache elements */
1193                                         /* Intel fn: 4, AMD fn: 8000001d */
1194         struct cpuid_regs **cpi_cache_leaves;   /* Acual leaves from above */
1195         struct cpuid_regs cpi_std[NMAX_CPI_STD];        /* 0 .. 7 */
1196         /*
1197          * extended function information
1198          */
1199         uint_t cpi_xmaxeax;             /* fn 0x80000000: %eax */
1200         char cpi_brandstr[49];          /* fn 0x8000000[234] */
1201         uint8_t cpi_pabits;             /* fn 0x80000006: %eax */
1202         uint8_t cpi_vabits;             /* fn 0x80000006: %eax */
1203         uint8_t cpi_fp_amd_save;        /* AMD: FP error pointer save rqd. */
1204         struct  cpuid_regs cpi_extd[NMAX_CPI_EXTD];     /* 0x800000XX */
1205 
1206         id_t cpi_coreid;                /* same coreid => strands share core */
1207         int cpi_pkgcoreid;              /* core number within single package */
1208         uint_t cpi_ncore_per_chip;      /* AMD: fn 0x80000008: %ecx[7-0] */
1209                                         /* Intel: fn 4: %eax[31-26] */
1210 
1211         /*
1212          * These values represent the number of bits that are required to store
1213          * information about the number of cores and threads.
1214          */
1215         uint_t cpi_ncore_bits;
1216         uint_t cpi_nthread_bits;
1217         /*
1218          * supported feature information
1219          */
1220         uint32_t cpi_support[6];
1221 #define STD_EDX_FEATURES        0
1222 #define AMD_EDX_FEATURES        1
1223 #define TM_EDX_FEATURES         2
1224 #define STD_ECX_FEATURES        3
1225 #define AMD_ECX_FEATURES        4
1226 #define STD_EBX_FEATURES        5
1227         /*
1228          * Synthesized information, where known.
1229          */
1230         uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
1231         const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
1232         uint32_t cpi_socket;            /* Chip package/socket type */
1233 
1234         struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
1235         uint32_t cpi_apicid;
1236         uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
1237         uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
1238                                         /* Intel: 1 */
1239         uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
1240         uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
1241 
1242         struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
1243 };
1244 
1245 
1246 static struct cpuid_info cpuid_info0;
1247 
1248 /*
1249  * These bit fields are defined by the Intel Application Note AP-485
1250  * "Intel Processor Identification and the CPUID Instruction"
1251  */
1252 #define CPI_FAMILY_XTD(cpi)     BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1253 #define CPI_MODEL_XTD(cpi)      BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1254 #define CPI_TYPE(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1255 #define CPI_FAMILY(cpi)         BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1256 #define CPI_STEP(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1257 #define CPI_MODEL(cpi)          BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1258 
1259 #define CPI_FEATURES_EDX(cpi)           ((cpi)->cpi_std[1].cp_edx)
1260 #define CPI_FEATURES_ECX(cpi)           ((cpi)->cpi_std[1].cp_ecx)
1261 #define CPI_FEATURES_XTD_EDX(cpi)       ((cpi)->cpi_extd[1].cp_edx)
1262 #define CPI_FEATURES_XTD_ECX(cpi)       ((cpi)->cpi_extd[1].cp_ecx)
1263 #define CPI_FEATURES_7_0_EBX(cpi)       ((cpi)->cpi_std[7].cp_ebx)
1264 #define CPI_FEATURES_7_0_ECX(cpi)       ((cpi)->cpi_std[7].cp_ecx)
1265 #define CPI_FEATURES_7_0_EDX(cpi)       ((cpi)->cpi_std[7].cp_edx)
1266 
1267 #define CPI_BRANDID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1268 #define CPI_CHUNKS(cpi)         BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1269 #define CPI_CPU_COUNT(cpi)      BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1270 #define CPI_APIC_ID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1271 
1272 #define CPI_MAXEAX_MAX          0x100           /* sanity control */
1273 #define CPI_XMAXEAX_MAX         0x80000100
1274 #define CPI_FN4_ECX_MAX         0x20            /* sanity: max fn 4 levels */
1275 #define CPI_FNB_ECX_MAX         0x20            /* sanity: max fn B levels */
1276 
1277 /*
1278  * Function 4 (Deterministic Cache Parameters) macros
1279  * Defined by Intel Application Note AP-485
1280  */
1281 #define CPI_NUM_CORES(regs)             BITX((regs)->cp_eax, 31, 26)
1282 #define CPI_NTHR_SHR_CACHE(regs)        BITX((regs)->cp_eax, 25, 14)
1283 #define CPI_FULL_ASSOC_CACHE(regs)      BITX((regs)->cp_eax, 9, 9)
1284 #define CPI_SELF_INIT_CACHE(regs)       BITX((regs)->cp_eax, 8, 8)
1285 #define CPI_CACHE_LVL(regs)             BITX((regs)->cp_eax, 7, 5)
1286 #define CPI_CACHE_TYPE(regs)            BITX((regs)->cp_eax, 4, 0)
1287 #define CPI_CPU_LEVEL_TYPE(regs)        BITX((regs)->cp_ecx, 15, 8)
1288 
1289 #define CPI_CACHE_WAYS(regs)            BITX((regs)->cp_ebx, 31, 22)
1290 #define CPI_CACHE_PARTS(regs)           BITX((regs)->cp_ebx, 21, 12)
1291 #define CPI_CACHE_COH_LN_SZ(regs)       BITX((regs)->cp_ebx, 11, 0)
1292 
1293 #define CPI_CACHE_SETS(regs)            BITX((regs)->cp_ecx, 31, 0)
1294 
1295 #define CPI_PREFCH_STRIDE(regs)         BITX((regs)->cp_edx, 9, 0)
1296 
1297 
1298 /*
1299  * A couple of shorthand macros to identify "later" P6-family chips
1300  * like the Pentium M and Core.  First, the "older" P6-based stuff
1301  * (loosely defined as "pre-Pentium-4"):
1302  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1303  */
1304 #define IS_LEGACY_P6(cpi) (                     \
1305         cpi->cpi_family == 6 &&                      \
1306                 (cpi->cpi_model == 1 ||              \
1307                 cpi->cpi_model == 3 ||               \
1308                 cpi->cpi_model == 5 ||               \
1309                 cpi->cpi_model == 6 ||               \
1310                 cpi->cpi_model == 7 ||               \
1311                 cpi->cpi_model == 8 ||               \
1312                 cpi->cpi_model == 0xA ||     \
1313                 cpi->cpi_model == 0xB)               \
1314 )
1315 
1316 /* A "new F6" is everything with family 6 that's not the above */
1317 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1318 
1319 /* Extended family/model support */
1320 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1321         cpi->cpi_family >= 0xf)
1322 
1323 /*
1324  * Info for monitor/mwait idle loop.
1325  *
1326  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1327  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1328  * 2006.
1329  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1330  * Documentation Updates" #33633, Rev 2.05, December 2006.
1331  */
1332 #define MWAIT_SUPPORT           (0x00000001)    /* mwait supported */
1333 #define MWAIT_EXTENSIONS        (0x00000002)    /* extenstion supported */
1334 #define MWAIT_ECX_INT_ENABLE    (0x00000004)    /* ecx 1 extension supported */
1335 #define MWAIT_SUPPORTED(cpi)    ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1336 #define MWAIT_INT_ENABLE(cpi)   ((cpi)->cpi_std[5].cp_ecx & 0x2)
1337 #define MWAIT_EXTENSION(cpi)    ((cpi)->cpi_std[5].cp_ecx & 0x1)
1338 #define MWAIT_SIZE_MIN(cpi)     BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1339 #define MWAIT_SIZE_MAX(cpi)     BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1340 /*
1341  * Number of sub-cstates for a given c-state.
1342  */
1343 #define MWAIT_NUM_SUBC_STATES(cpi, c_state)                     \
1344         BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1345 
1346 /*
1347  * XSAVE leaf 0xD enumeration
1348  */
1349 #define CPUID_LEAFD_2_YMM_OFFSET        576
1350 #define CPUID_LEAFD_2_YMM_SIZE          256
1351 
1352 /*
1353  * Common extended leaf names to cut down on typos.
1354  */
1355 #define CPUID_LEAF_EXT_0                0x80000000
1356 #define CPUID_LEAF_EXT_8                0x80000008
1357 #define CPUID_LEAF_EXT_1d               0x8000001d
1358 #define CPUID_LEAF_EXT_1e               0x8000001e
1359 
1360 /*
1361  * Functions we consune from cpuid_subr.c;  don't publish these in a header
1362  * file to try and keep people using the expected cpuid_* interfaces.
1363  */
1364 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1365 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1366 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1367 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1368 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1369 
1370 /*
1371  * Apply up various platform-dependent restrictions where the
1372  * underlying platform restrictions mean the CPU can be marked
1373  * as less capable than its cpuid instruction would imply.
1374  */
1375 #if defined(__xpv)
1376 static void
1377 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1378 {
1379         switch (eax) {
1380         case 1: {
1381                 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1382                     0 : CPUID_INTC_EDX_MCA;
1383                 cp->cp_edx &=
1384                     ~(mcamask |
1385                     CPUID_INTC_EDX_PSE |
1386                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1387                     CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1388                     CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1389                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1390                     CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1391                 break;
1392         }
1393 
1394         case 0x80000001:
1395                 cp->cp_edx &=
1396                     ~(CPUID_AMD_EDX_PSE |
1397                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1398                     CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1399                     CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1400                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1401                     CPUID_AMD_EDX_TSCP);
1402                 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1403                 break;
1404         default:
1405                 break;
1406         }
1407 
1408         switch (vendor) {
1409         case X86_VENDOR_Intel:
1410                 switch (eax) {
1411                 case 4:
1412                         /*
1413                          * Zero out the (ncores-per-chip - 1) field
1414                          */
1415                         cp->cp_eax &= 0x03fffffff;
1416                         break;
1417                 default:
1418                         break;
1419                 }
1420                 break;
1421         case X86_VENDOR_AMD:
1422                 switch (eax) {
1423 
1424                 case 0x80000001:
1425                         cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1426                         break;
1427 
1428                 case CPUID_LEAF_EXT_8:
1429                         /*
1430                          * Zero out the (ncores-per-chip - 1) field
1431                          */
1432                         cp->cp_ecx &= 0xffffff00;
1433                         break;
1434                 default:
1435                         break;
1436                 }
1437                 break;
1438         default:
1439                 break;
1440         }
1441 }
1442 #else
1443 #define platform_cpuid_mangle(vendor, eax, cp)  /* nothing */
1444 #endif
1445 
1446 /*
1447  *  Some undocumented ways of patching the results of the cpuid
1448  *  instruction to permit running Solaris 10 on future cpus that
1449  *  we don't currently support.  Could be set to non-zero values
1450  *  via settings in eeprom.
1451  */
1452 
1453 uint32_t cpuid_feature_ecx_include;
1454 uint32_t cpuid_feature_ecx_exclude;
1455 uint32_t cpuid_feature_edx_include;
1456 uint32_t cpuid_feature_edx_exclude;
1457 
1458 /*
1459  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1460  */
1461 void
1462 cpuid_alloc_space(cpu_t *cpu)
1463 {
1464         /*
1465          * By convention, cpu0 is the boot cpu, which is set up
1466          * before memory allocation is available.  All other cpus get
1467          * their cpuid_info struct allocated here.
1468          */
1469         ASSERT(cpu->cpu_id != 0);
1470         ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1471         cpu->cpu_m.mcpu_cpi =
1472             kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1473 }
1474 
1475 void
1476 cpuid_free_space(cpu_t *cpu)
1477 {
1478         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1479         int i;
1480 
1481         ASSERT(cpi != NULL);
1482         ASSERT(cpi != &cpuid_info0);
1483 
1484         /*
1485          * Free up any cache leaf related dynamic storage. The first entry was
1486          * cached from the standard cpuid storage, so we should not free it.
1487          */
1488         for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1489                 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1490         if (cpi->cpi_cache_leaf_size > 0)
1491                 kmem_free(cpi->cpi_cache_leaves,
1492                     cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1493 
1494         kmem_free(cpi, sizeof (*cpi));
1495         cpu->cpu_m.mcpu_cpi = NULL;
1496 }
1497 
1498 #if !defined(__xpv)
1499 /*
1500  * Determine the type of the underlying platform. This is used to customize
1501  * initialization of various subsystems (e.g. TSC). determine_platform() must
1502  * only ever be called once to prevent two processors from seeing different
1503  * values of platform_type. Must be called before cpuid_pass1(), the earliest
1504  * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1505  */
1506 void
1507 determine_platform(void)
1508 {
1509         struct cpuid_regs cp;
1510         uint32_t base;
1511         uint32_t regs[4];
1512         char *hvstr = (char *)regs;
1513 
1514         ASSERT(platform_type == -1);
1515 
1516         platform_type = HW_NATIVE;
1517 
1518         if (!enable_platform_detection)
1519                 return;
1520 
1521         /*
1522          * If Hypervisor CPUID bit is set, try to determine hypervisor
1523          * vendor signature, and set platform type accordingly.
1524          *
1525          * References:
1526          * http://lkml.org/lkml/2008/10/1/246
1527          * http://kb.vmware.com/kb/1009458
1528          */
1529         cp.cp_eax = 0x1;
1530         (void) __cpuid_insn(&cp);
1531         if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1532                 cp.cp_eax = 0x40000000;
1533                 (void) __cpuid_insn(&cp);
1534                 regs[0] = cp.cp_ebx;
1535                 regs[1] = cp.cp_ecx;
1536                 regs[2] = cp.cp_edx;
1537                 regs[3] = 0;
1538                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1539                         platform_type = HW_XEN_HVM;
1540                         return;
1541                 }
1542                 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1543                         platform_type = HW_VMWARE;
1544                         return;
1545                 }
1546                 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1547                         platform_type = HW_KVM;
1548                         return;
1549                 }
1550                 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1551                         platform_type = HW_BHYVE;
1552                         return;
1553                 }
1554                 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1555                         platform_type = HW_MICROSOFT;
1556         } else {
1557                 /*
1558                  * Check older VMware hardware versions. VMware hypervisor is
1559                  * detected by performing an IN operation to VMware hypervisor
1560                  * port and checking that value returned in %ebx is VMware
1561                  * hypervisor magic value.
1562                  *
1563                  * References: http://kb.vmware.com/kb/1009458
1564                  */
1565                 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1566                 if (regs[1] == VMWARE_HVMAGIC) {
1567                         platform_type = HW_VMWARE;
1568                         return;
1569                 }
1570         }
1571 
1572         /*
1573          * Check Xen hypervisor. In a fully virtualized domain,
1574          * Xen's pseudo-cpuid function returns a string representing the
1575          * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1576          * supported cpuid function. We need at least a (base + 2) leaf value
1577          * to do what we want to do. Try different base values, since the
1578          * hypervisor might use a different one depending on whether Hyper-V
1579          * emulation is switched on by default or not.
1580          */
1581         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1582                 cp.cp_eax = base;
1583                 (void) __cpuid_insn(&cp);
1584                 regs[0] = cp.cp_ebx;
1585                 regs[1] = cp.cp_ecx;
1586                 regs[2] = cp.cp_edx;
1587                 regs[3] = 0;
1588                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1589                     cp.cp_eax >= (base + 2)) {
1590                         platform_type &= ~HW_NATIVE;
1591                         platform_type |= HW_XEN_HVM;
1592                         return;
1593                 }
1594         }
1595 }
1596 
1597 int
1598 get_hwenv(void)
1599 {
1600         ASSERT(platform_type != -1);
1601         return (platform_type);
1602 }
1603 
1604 int
1605 is_controldom(void)
1606 {
1607         return (0);
1608 }
1609 
1610 #else
1611 
1612 int
1613 get_hwenv(void)
1614 {
1615         return (HW_XEN_PV);
1616 }
1617 
1618 int
1619 is_controldom(void)
1620 {
1621         return (DOMAIN_IS_INITDOMAIN(xen_info));
1622 }
1623 
1624 #endif  /* __xpv */
1625 
1626 /*
1627  * Make sure that we have gathered all of the CPUID leaves that we might need to
1628  * determine topology. We assume that the standard leaf 1 has already been done
1629  * and that xmaxeax has already been calculated.
1630  */
1631 static void
1632 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1633 {
1634         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1635 
1636         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1637                 struct cpuid_regs *cp;
1638 
1639                 cp = &cpi->cpi_extd[8];
1640                 cp->cp_eax = CPUID_LEAF_EXT_8;
1641                 (void) __cpuid_insn(cp);
1642                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1643         }
1644 
1645         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1646             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1647                 struct cpuid_regs *cp;
1648 
1649                 cp = &cpi->cpi_extd[0x1e];
1650                 cp->cp_eax = CPUID_LEAF_EXT_1e;
1651                 (void) __cpuid_insn(cp);
1652         }
1653 }
1654 
1655 /*
1656  * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1657  * it to everything else. If not, and we're on an AMD system where 8000001e is
1658  * valid, then we use that. Othewrise, we fall back to the default value for the
1659  * APIC ID in leaf 1.
1660  */
1661 static uint32_t
1662 cpuid_gather_apicid(struct cpuid_info *cpi)
1663 {
1664         /*
1665          * Leaf B changes based on the arguments to it. Beacuse we don't cache
1666          * it, we need to gather it again.
1667          */
1668         if (cpi->cpi_maxeax >= 0xB) {
1669                 struct cpuid_regs regs;
1670                 struct cpuid_regs *cp;
1671 
1672                 cp = &regs;
1673                 cp->cp_eax = 0xB;
1674                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1675                 (void) __cpuid_insn(cp);
1676 
1677                 if (cp->cp_ebx != 0) {
1678                         return (cp->cp_edx);
1679                 }
1680         }
1681 
1682         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1683             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1684             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1685                 return (cpi->cpi_extd[0x1e].cp_eax);
1686         }
1687 
1688         return (CPI_APIC_ID(cpi));
1689 }
1690 
1691 /*
1692  * For AMD processors, attempt to calculate the number of chips and cores that
1693  * exist. The way that we do this varies based on the generation, because the
1694  * generations themselves have changed dramatically.
1695  *
1696  * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1697  * However, with the advent of family 17h (Zen) it actually tells us the number
1698  * of threads, so we need to look at leaf 0x8000001e if available to determine
1699  * its value. Otherwise, for all prior families, the number of enabled cores is
1700  * the same as threads.
1701  *
1702  * If we do not have leaf 0x80000008, then we assume that this processor does
1703  * not have anything. AMD's older CPUID specification says there's no reason to
1704  * fall back to leaf 1.
1705  *
1706  * In some virtualization cases we will not have leaf 8000001e or it will be
1707  * zero. When that happens we assume the number of threads is one.
1708  */
1709 static void
1710 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1711 {
1712         uint_t nthreads, nthread_per_core;
1713 
1714         nthreads = nthread_per_core = 1;
1715 
1716         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1717                 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1718         } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1719                 nthreads = CPI_CPU_COUNT(cpi);
1720         }
1721 
1722         /*
1723          * For us to have threads, and know about it, we have to be at least at
1724          * family 17h and have the cpuid bit that says we have extended
1725          * topology.
1726          */
1727         if (cpi->cpi_family >= 0x17 &&
1728             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1729             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1730                 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1731         }
1732 
1733         *ncpus = nthreads;
1734         *ncores = nthreads / nthread_per_core;
1735 }
1736 
1737 /*
1738  * Seed the initial values for the cores and threads for an Intel based
1739  * processor. These values will be overwritten if we detect that the processor
1740  * supports CPUID leaf 0xb.
1741  */
1742 static void
1743 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1744 {
1745         /*
1746          * Only seed the number of physical cores from the first level leaf 4
1747          * information. The number of threads there indicate how many share the
1748          * L1 cache, which may or may not have anything to do with the number of
1749          * logical CPUs per core.
1750          */
1751         if (cpi->cpi_maxeax >= 4) {
1752                 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1753         } else {
1754                 *ncores = 1;
1755         }
1756 
1757         if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1758                 *ncpus = CPI_CPU_COUNT(cpi);
1759         } else {
1760                 *ncpus = *ncores;
1761         }
1762 }
1763 
1764 static boolean_t
1765 cpuid_leafB_getids(cpu_t *cpu)
1766 {
1767         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1768         struct cpuid_regs regs;
1769         struct cpuid_regs *cp;
1770 
1771         if (cpi->cpi_maxeax < 0xB)
1772                 return (B_FALSE);
1773 
1774         cp = &regs;
1775         cp->cp_eax = 0xB;
1776         cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1777 
1778         (void) __cpuid_insn(cp);
1779 
1780         /*
1781          * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1782          * indicates that the extended topology enumeration leaf is
1783          * available.
1784          */
1785         if (cp->cp_ebx != 0) {
1786                 uint32_t x2apic_id = 0;
1787                 uint_t coreid_shift = 0;
1788                 uint_t ncpu_per_core = 1;
1789                 uint_t chipid_shift = 0;
1790                 uint_t ncpu_per_chip = 1;
1791                 uint_t i;
1792                 uint_t level;
1793 
1794                 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1795                         cp->cp_eax = 0xB;
1796                         cp->cp_ecx = i;
1797 
1798                         (void) __cpuid_insn(cp);
1799                         level = CPI_CPU_LEVEL_TYPE(cp);
1800 
1801                         if (level == 1) {
1802                                 x2apic_id = cp->cp_edx;
1803                                 coreid_shift = BITX(cp->cp_eax, 4, 0);
1804                                 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1805                         } else if (level == 2) {
1806                                 x2apic_id = cp->cp_edx;
1807                                 chipid_shift = BITX(cp->cp_eax, 4, 0);
1808                                 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1809                         }
1810                 }
1811 
1812                 /*
1813                  * cpi_apicid is taken care of in cpuid_gather_apicid.
1814                  */
1815                 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1816                 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1817                     ncpu_per_core;
1818                 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1819                 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1820                 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1821                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1822                 cpi->cpi_procnodeid = cpi->cpi_chipid;
1823                 cpi->cpi_compunitid = cpi->cpi_coreid;
1824 
1825                 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1826                         cpi->cpi_nthread_bits = coreid_shift;
1827                         cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1828                 }
1829 
1830                 return (B_TRUE);
1831         } else {
1832                 return (B_FALSE);
1833         }
1834 }
1835 
1836 static void
1837 cpuid_intel_getids(cpu_t *cpu, void *feature)
1838 {
1839         uint_t i;
1840         uint_t chipid_shift = 0;
1841         uint_t coreid_shift = 0;
1842         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1843 
1844         /*
1845          * There are no compute units or processor nodes currently on Intel.
1846          * Always set these to one.
1847          */
1848         cpi->cpi_procnodes_per_pkg = 1;
1849         cpi->cpi_cores_per_compunit = 1;
1850 
1851         /*
1852          * If cpuid Leaf B is present, use that to try and get this information.
1853          * It will be the most accurate for Intel CPUs.
1854          */
1855         if (cpuid_leafB_getids(cpu))
1856                 return;
1857 
1858         /*
1859          * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1860          * and ncore_per_chip. These represent the largest power of two values
1861          * that we need to cover all of the IDs in the system. Therefore, we use
1862          * those values to seed the number of bits needed to cover information
1863          * in the case when leaf B is not available. These values will probably
1864          * be larger than required, but that's OK.
1865          */
1866         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1867         cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1868 
1869         for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1870                 chipid_shift++;
1871 
1872         cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1873         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1874 
1875         if (is_x86_feature(feature, X86FSET_CMP)) {
1876                 /*
1877                  * Multi-core (and possibly multi-threaded)
1878                  * processors.
1879                  */
1880                 uint_t ncpu_per_core;
1881                 if (cpi->cpi_ncore_per_chip == 1)
1882                         ncpu_per_core = cpi->cpi_ncpu_per_chip;
1883                 else if (cpi->cpi_ncore_per_chip > 1)
1884                         ncpu_per_core = cpi->cpi_ncpu_per_chip /
1885                             cpi->cpi_ncore_per_chip;
1886                 /*
1887                  * 8bit APIC IDs on dual core Pentiums
1888                  * look like this:
1889                  *
1890                  * +-----------------------+------+------+
1891                  * | Physical Package ID   |  MC  |  HT  |
1892                  * +-----------------------+------+------+
1893                  * <------- chipid -------->
1894                  * <------- coreid --------------->
1895                  *                         <--- clogid -->
1896                  *                         <------>
1897                  *                         pkgcoreid
1898                  *
1899                  * Where the number of bits necessary to
1900                  * represent MC and HT fields together equals
1901                  * to the minimum number of bits necessary to
1902                  * store the value of cpi->cpi_ncpu_per_chip.
1903                  * Of those bits, the MC part uses the number
1904                  * of bits necessary to store the value of
1905                  * cpi->cpi_ncore_per_chip.
1906                  */
1907                 for (i = 1; i < ncpu_per_core; i <<= 1)
1908                         coreid_shift++;
1909                 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1910                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1911         } else if (is_x86_feature(feature, X86FSET_HTT)) {
1912                 /*
1913                  * Single-core multi-threaded processors.
1914                  */
1915                 cpi->cpi_coreid = cpi->cpi_chipid;
1916                 cpi->cpi_pkgcoreid = 0;
1917         } else {
1918                 /*
1919                  * Single-core single-thread processors.
1920                  */
1921                 cpi->cpi_coreid = cpu->cpu_id;
1922                 cpi->cpi_pkgcoreid = 0;
1923         }
1924         cpi->cpi_procnodeid = cpi->cpi_chipid;
1925         cpi->cpi_compunitid = cpi->cpi_coreid;
1926 }
1927 
1928 /*
1929  * Historically, AMD has had CMP chips with only a single thread per core.
1930  * However, starting in family 17h (Zen), this has changed and they now have
1931  * multiple threads. Our internal core id needs to be a unique value.
1932  *
1933  * To determine the core id of an AMD system, if we're from a family before 17h,
1934  * then we just use the cpu id, as that gives us a good value that will be
1935  * unique for each core. If instead, we're on family 17h or later, then we need
1936  * to do something more complicated. CPUID leaf 0x8000001e can tell us
1937  * how many threads are in the system. Based on that, we'll shift the APIC ID.
1938  * We can't use the normal core id in that leaf as it's only unique within the
1939  * socket, which is perfect for cpi_pkgcoreid, but not us.
1940  */
1941 static id_t
1942 cpuid_amd_get_coreid(cpu_t *cpu)
1943 {
1944         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1945 
1946         if (cpi->cpi_family >= 0x17 &&
1947             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1948             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1949                 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1950                 if (nthreads > 1) {
1951                         VERIFY3U(nthreads, ==, 2);
1952                         return (cpi->cpi_apicid >> 1);
1953                 }
1954         }
1955 
1956         return (cpu->cpu_id);
1957 }
1958 
1959 /*
1960  * IDs on AMD is a more challenging task. This is notable because of the
1961  * following two facts:
1962  *
1963  *  1. Before family 0x17 (Zen), there was no support for SMT and there was
1964  *     also no way to get an actual unique core id from the system. As such, we
1965  *     synthesize this case by using cpu->cpu_id.  This scheme does not,
1966  *     however, guarantee that sibling cores of a chip will have sequential
1967  *     coreids starting at a multiple of the number of cores per chip - that is
1968  *     usually the case, but if the ACPI MADT table is presented in a different
1969  *     order then we need to perform a few more gymnastics for the pkgcoreid.
1970  *
1971  *  2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1972  *     called compute units. These compute units share the L1I cache, L2 cache,
1973  *     and the FPU. To deal with this, a new topology leaf was added in
1974  *     0x8000001e. However, parts of this leaf have different meanings
1975  *     once we get to family 0x17.
1976  */
1977 
1978 static void
1979 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1980 {
1981         int i, first_half, coreidsz;
1982         uint32_t nb_caps_reg;
1983         uint_t node2_1;
1984         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1985         struct cpuid_regs *cp;
1986 
1987         /*
1988          * Calculate the core id (this comes from hardware in family 0x17 if it
1989          * hasn't been stripped by virtualization). We always set the compute
1990          * unit id to the same value. Also, initialize the default number of
1991          * cores per compute unit and nodes per package. This will be
1992          * overwritten when we know information about a particular family.
1993          */
1994         cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1995         cpi->cpi_compunitid = cpi->cpi_coreid;
1996         cpi->cpi_cores_per_compunit = 1;
1997         cpi->cpi_procnodes_per_pkg = 1;
1998 
1999         /*
2000          * To construct the logical ID, we need to determine how many APIC IDs
2001          * are dedicated to the cores and threads. This is provided for us in
2002          * 0x80000008. However, if it's not present (say due to virtualization),
2003          * then we assume it's one. This should be present on all 64-bit AMD
2004          * processors.  It was added in family 0xf (Hammer).
2005          */
2006         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2007                 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2008 
2009                 /*
2010                  * In AMD parlance chip is really a node while illumos
2011                  * uses chip as equivalent to socket/package.
2012                  */
2013                 if (coreidsz == 0) {
2014                         /* Use legacy method */
2015                         for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2016                                 coreidsz++;
2017                         if (coreidsz == 0)
2018                                 coreidsz = 1;
2019                 }
2020         } else {
2021                 /* Assume single-core part */
2022                 coreidsz = 1;
2023         }
2024         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2025 
2026         /*
2027          * The package core ID varies depending on the family. While it may be
2028          * tempting to use the CPUID_LEAF_EXT_1e %ebx core id, unfortunately,
2029          * this value is the core id in the given node. For non-virtualized
2030          * family 17h, we need to take the logical core id and shift off the
2031          * threads like we do when getting the core id.  Otherwise, we can use
2032          * the clogid as is. When family 17h is virtualized, the clogid should
2033          * be sufficient as if we don't have valid data in the leaf, then we
2034          * won't think we have SMT, in which case the cpi_clogid should be
2035          * sufficient.
2036          */
2037         if (cpi->cpi_family >= 0x17 &&
2038             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2039             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2040             cpi->cpi_extd[0x1e].cp_ebx != 0) {
2041                 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
2042                 if (nthreads > 1) {
2043                         VERIFY3U(nthreads, ==, 2);
2044                         cpi->cpi_pkgcoreid = cpi->cpi_clogid >> 1;
2045                 } else {
2046                         cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2047                 }
2048         } else {
2049                 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2050         }
2051 
2052         /*
2053          * Obtain the node ID and compute unit IDs. If we're on family 0x15
2054          * (bulldozer) or newer, then we can derive all of this from leaf
2055          * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2056          */
2057         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2058             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2059                 cp = &cpi->cpi_extd[0x1e];
2060 
2061                 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2062                 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2063 
2064                 /*
2065                  * For Bulldozer-era CPUs, recalculate the compute unit
2066                  * information.
2067                  */
2068                 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2069                         cpi->cpi_cores_per_compunit =
2070                             BITX(cp->cp_ebx, 15, 8) + 1;
2071                         cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2072                             (cpi->cpi_ncore_per_chip /
2073                             cpi->cpi_cores_per_compunit) *
2074                             (cpi->cpi_procnodeid /
2075                             cpi->cpi_procnodes_per_pkg);
2076                 }
2077         } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2078                 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2079         } else if (cpi->cpi_family == 0x10) {
2080                 /*
2081                  * See if we are a multi-node processor.
2082                  * All processors in the system have the same number of nodes
2083                  */
2084                 nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
2085                 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2086                         /* Single-node */
2087                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2088                             coreidsz);
2089                 } else {
2090 
2091                         /*
2092                          * Multi-node revision D (2 nodes per package
2093                          * are supported)
2094                          */
2095                         cpi->cpi_procnodes_per_pkg = 2;
2096 
2097                         first_half = (cpi->cpi_pkgcoreid <=
2098                             (cpi->cpi_ncore_per_chip/2 - 1));
2099 
2100                         if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2101                                 /* We are BSP */
2102                                 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2103                         } else {
2104 
2105                                 /* We are AP */
2106                                 /* NodeId[2:1] bits to use for reading F3xe8 */
2107                                 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2108 
2109                                 nb_caps_reg =
2110                                     pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2111 
2112                                 /*
2113                                  * Check IntNodeNum bit (31:30, but bit 31 is
2114                                  * always 0 on dual-node processors)
2115                                  */
2116                                 if (BITX(nb_caps_reg, 30, 30) == 0)
2117                                         cpi->cpi_procnodeid = node2_1 +
2118                                             !first_half;
2119                                 else
2120                                         cpi->cpi_procnodeid = node2_1 +
2121                                             first_half;
2122                         }
2123                 }
2124         } else {
2125                 cpi->cpi_procnodeid = 0;
2126         }
2127 
2128         cpi->cpi_chipid =
2129             cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2130 
2131         cpi->cpi_ncore_bits = coreidsz;
2132         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2133             cpi->cpi_ncore_per_chip);
2134 }
2135 
2136 static void
2137 spec_uarch_flush_noop(void)
2138 {
2139 }
2140 
2141 /*
2142  * When microcode is present that mitigates MDS, this wrmsr will also flush the
2143  * MDS-related micro-architectural state that would normally happen by calling
2144  * x86_md_clear().
2145  */
2146 static void
2147 spec_uarch_flush_msr(void)
2148 {
2149         wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2150 }
2151 
2152 /*
2153  * This function points to a function that will flush certain
2154  * micro-architectural state on the processor. This flush is used to mitigate
2155  * two different classes of Intel CPU vulnerabilities: L1TF and MDS. This
2156  * function can point to one of three functions:
2157  *
2158  * - A noop which is done because we either are vulnerable, but do not have
2159  *   microcode available to help deal with a fix, or because we aren't
2160  *   vulnerable.
2161  *
2162  * - spec_uarch_flush_msr which will issue an L1D flush and if microcode to
2163  *   mitigate MDS is present, also perform the equivalent of the MDS flush;
2164  *   however, it only flushes the MDS related micro-architectural state on the
2165  *   current hyperthread, it does not do anything for the twin.
2166  *
2167  * - x86_md_clear which will flush the MDS related state. This is done when we
2168  *   have a processor that is vulnerable to MDS, but is not vulnerable to L1TF
2169  *   (RDCL_NO is set).
2170  */
2171 void (*spec_uarch_flush)(void) = spec_uarch_flush_noop;
2172 
2173 void (*x86_md_clear)(void) = x86_md_clear_noop;
2174 
2175 static void
2176 cpuid_update_md_clear(cpu_t *cpu, uchar_t *featureset)
2177 {
2178         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2179 
2180         /*
2181          * While RDCL_NO indicates that one of the MDS vulnerabilities (MSBDS)
2182          * has been fixed in hardware, it doesn't cover everything related to
2183          * MDS. Therefore we can only rely on MDS_NO to determine that we don't
2184          * need to mitigate this.
2185          */
2186         if (cpi->cpi_vendor != X86_VENDOR_Intel ||
2187             is_x86_feature(featureset, X86FSET_MDS_NO)) {
2188                 x86_md_clear = x86_md_clear_noop;
2189                 membar_producer();
2190                 return;
2191         }
2192 
2193         if (is_x86_feature(featureset, X86FSET_MD_CLEAR)) {
2194                 x86_md_clear = x86_md_clear_verw;
2195         }
2196 
2197         membar_producer();
2198 }
2199 
2200 static void
2201 cpuid_update_l1d_flush(cpu_t *cpu, uchar_t *featureset)
2202 {
2203         boolean_t need_l1d, need_mds;
2204         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2205 
2206         /*
2207          * If we're not on Intel or we've mitigated both RDCL and MDS in
2208          * hardware, then there's nothing left for us to do for enabling the
2209          * flush. We can also go ahead and say that SMT exclusion is
2210          * unnecessary.
2211          */
2212         if (cpi->cpi_vendor != X86_VENDOR_Intel ||
2213             (is_x86_feature(featureset, X86FSET_RDCL_NO) &&
2214             is_x86_feature(featureset, X86FSET_MDS_NO))) {
2215                 extern int smt_exclusion;
2216                 smt_exclusion = 0;
2217                 spec_uarch_flush = spec_uarch_flush_noop;
2218                 membar_producer();
2219                 return;
2220         }
2221 
2222         /*
2223          * The locations where we need to perform an L1D flush are required both
2224          * for mitigating L1TF and MDS. When verw support is present in
2225          * microcode, then the L1D flush will take care of doing that as well.
2226          * However, if we have a system where RDCL_NO is present, but we don't
2227          * have MDS_NO, then we need to do a verw (x86_md_clear) and not a full
2228          * L1D flush.
2229          */
2230         if (!is_x86_feature(featureset, X86FSET_RDCL_NO) &&
2231             is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2232             !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2233                 need_l1d = B_TRUE;
2234         } else {
2235                 need_l1d = B_FALSE;
2236         }
2237 
2238         if (!is_x86_feature(featureset, X86FSET_MDS_NO) &&
2239             is_x86_feature(featureset, X86FSET_MD_CLEAR)) {
2240                 need_mds = B_TRUE;
2241         } else {
2242                 need_mds = B_FALSE;
2243         }
2244 
2245         if (need_l1d) {
2246                 spec_uarch_flush = spec_uarch_flush_msr;
2247         } else if (need_mds) {
2248                 spec_uarch_flush = x86_md_clear;
2249         } else {
2250                 /*
2251                  * We have no hardware mitigations available to us.
2252                  */
2253                 spec_uarch_flush = spec_uarch_flush_noop;
2254         }
2255         membar_producer();
2256 }
2257 
2258 static void
2259 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2260 {
2261         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2262 
2263         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2264             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2265                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2266                         add_x86_feature(featureset, X86FSET_IBPB);
2267                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2268                         add_x86_feature(featureset, X86FSET_IBRS);
2269                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2270                         add_x86_feature(featureset, X86FSET_STIBP);
2271                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2272                         add_x86_feature(featureset, X86FSET_IBRS_ALL);
2273                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2274                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2275                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2276                         add_x86_feature(featureset, X86FSET_RSBA);
2277                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2278                         add_x86_feature(featureset, X86FSET_SSBD);
2279                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2280                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2281                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2282                         add_x86_feature(featureset, X86FSET_SSB_NO);
2283         } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2284             cpi->cpi_maxeax >= 7) {
2285                 struct cpuid_regs *ecp;
2286                 ecp = &cpi->cpi_std[7];
2287 
2288                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_MD_CLEAR) {
2289                         add_x86_feature(featureset, X86FSET_MD_CLEAR);
2290                 }
2291 
2292                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2293                         add_x86_feature(featureset, X86FSET_IBRS);
2294                         add_x86_feature(featureset, X86FSET_IBPB);
2295                 }
2296 
2297                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2298                         add_x86_feature(featureset, X86FSET_STIBP);
2299                 }
2300 
2301                 /*
2302                  * Don't read the arch caps MSR on xpv where we lack the
2303                  * on_trap().
2304                  */
2305 #ifndef __xpv
2306                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2307                         on_trap_data_t otd;
2308 
2309                         /*
2310                          * Be paranoid and assume we'll get a #GP.
2311                          */
2312                         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2313                                 uint64_t reg;
2314 
2315                                 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2316                                 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2317                                         add_x86_feature(featureset,
2318                                             X86FSET_RDCL_NO);
2319                                 }
2320                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2321                                         add_x86_feature(featureset,
2322                                             X86FSET_IBRS_ALL);
2323                                 }
2324                                 if (reg & IA32_ARCH_CAP_RSBA) {
2325                                         add_x86_feature(featureset,
2326                                             X86FSET_RSBA);
2327                                 }
2328                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2329                                         add_x86_feature(featureset,
2330                                             X86FSET_L1D_VM_NO);
2331                                 }
2332                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2333                                         add_x86_feature(featureset,
2334                                             X86FSET_SSB_NO);
2335                                 }
2336                                 if (reg & IA32_ARCH_CAP_MDS_NO) {
2337                                         add_x86_feature(featureset,
2338                                             X86FSET_MDS_NO);
2339                                 }
2340                         }
2341                         no_trap();
2342                 }
2343 #endif  /* !__xpv */
2344 
2345                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2346                         add_x86_feature(featureset, X86FSET_SSBD);
2347 
2348                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2349                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2350         }
2351 
2352         if (cpu->cpu_id != 0)
2353                 return;
2354 
2355         /*
2356          * We need to determine what changes are required for mitigating L1TF
2357          * and MDS. If the CPU suffers from either of them, then SMT exclusion
2358          * is required.
2359          *
2360          * If any of these are present, then we need to flush u-arch state at
2361          * various points. For MDS, we need to do so whenever we change to a
2362          * lesser privilege level or we are halting the CPU. For L1TF we need to
2363          * flush the L1D cache at VM entry. When we have microcode that handles
2364          * MDS, the L1D flush also clears the other u-arch state that the
2365          * md_clear does.
2366          */
2367 
2368         /*
2369          * Update whether or not we need to be taking explicit action against
2370          * MDS.
2371          */
2372         cpuid_update_md_clear(cpu, featureset);
2373 
2374         /*
2375          * Determine whether SMT exclusion is required and whether or not we
2376          * need to perform an l1d flush.
2377          */
2378         cpuid_update_l1d_flush(cpu, featureset);
2379 }
2380 
2381 /*
2382  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2383  */
2384 void
2385 setup_xfem(void)
2386 {
2387         uint64_t flags = XFEATURE_LEGACY_FP;
2388 
2389         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2390 
2391         if (is_x86_feature(x86_featureset, X86FSET_SSE))
2392                 flags |= XFEATURE_SSE;
2393 
2394         if (is_x86_feature(x86_featureset, X86FSET_AVX))
2395                 flags |= XFEATURE_AVX;
2396 
2397         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2398                 flags |= XFEATURE_AVX512;
2399 
2400         set_xcr(XFEATURE_ENABLED_MASK, flags);
2401 
2402         xsave_bv_all = flags;
2403 }
2404 
2405 static void
2406 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2407 {
2408         struct cpuid_info *cpi;
2409 
2410         cpi = cpu->cpu_m.mcpu_cpi;
2411 
2412         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2413                 cpuid_gather_amd_topology_leaves(cpu);
2414         }
2415 
2416         cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2417 
2418         /*
2419          * Before we can calculate the IDs that we should assign to this
2420          * processor, we need to understand how many cores and threads it has.
2421          */
2422         switch (cpi->cpi_vendor) {
2423         case X86_VENDOR_Intel:
2424                 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2425                     &cpi->cpi_ncore_per_chip);
2426                 break;
2427         case X86_VENDOR_AMD:
2428                 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2429                     &cpi->cpi_ncore_per_chip);
2430                 break;
2431         default:
2432                 /*
2433                  * If we have some other x86 compatible chip, it's not clear how
2434                  * they would behave. The most common case is virtualization
2435                  * today, though there are also 64-bit VIA chips. Assume that
2436                  * all we can get is the basic Leaf 1 HTT information.
2437                  */
2438                 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2439                         cpi->cpi_ncore_per_chip = 1;
2440                         cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2441                 }
2442                 break;
2443         }
2444 
2445         /*
2446          * Based on the calculated number of threads and cores, potentially
2447          * assign the HTT and CMT features.
2448          */
2449         if (cpi->cpi_ncore_per_chip > 1) {
2450                 add_x86_feature(featureset, X86FSET_CMP);
2451         }
2452 
2453         if (cpi->cpi_ncpu_per_chip > 1 &&
2454             cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2455                 add_x86_feature(featureset, X86FSET_HTT);
2456         }
2457 
2458         /*
2459          * Now that has been set up, we need to go through and calculate all of
2460          * the rest of the parameters that exist. If we think the CPU doesn't
2461          * have either SMT (HTT) or CMP, then we basically go through and fake
2462          * up information in some way. The most likely case for this is
2463          * virtualization where we have a lot of partial topology information.
2464          */
2465         if (!is_x86_feature(featureset, X86FSET_HTT) &&
2466             !is_x86_feature(featureset, X86FSET_CMP)) {
2467                 /*
2468                  * This is a single core, single-threaded processor.
2469                  */
2470                 cpi->cpi_procnodes_per_pkg = 1;
2471                 cpi->cpi_cores_per_compunit = 1;
2472                 cpi->cpi_compunitid = 0;
2473                 cpi->cpi_chipid = -1;
2474                 cpi->cpi_clogid = 0;
2475                 cpi->cpi_coreid = cpu->cpu_id;
2476                 cpi->cpi_pkgcoreid = 0;
2477                 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2478                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2479                 } else {
2480                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2481                 }
2482         } else {
2483                 switch (cpi->cpi_vendor) {
2484                 case X86_VENDOR_Intel:
2485                         cpuid_intel_getids(cpu, featureset);
2486                         break;
2487                 case X86_VENDOR_AMD:
2488                         cpuid_amd_getids(cpu, featureset);
2489                         break;
2490                 default:
2491                         /*
2492                          * In this case, it's hard to say what we should do.
2493                          * We're going to model them to the OS as single core
2494                          * threads. We don't have a good identifier for them, so
2495                          * we're just going to use the cpu id all on a single
2496                          * chip.
2497                          *
2498                          * This case has historically been different from the
2499                          * case above where we don't have HTT or CMP. While they
2500                          * could be combined, we've opted to keep it separate to
2501                          * minimize the risk of topology changes in weird cases.
2502                          */
2503                         cpi->cpi_procnodes_per_pkg = 1;
2504                         cpi->cpi_cores_per_compunit = 1;
2505                         cpi->cpi_chipid = 0;
2506                         cpi->cpi_coreid = cpu->cpu_id;
2507                         cpi->cpi_clogid = cpu->cpu_id;
2508                         cpi->cpi_pkgcoreid = cpu->cpu_id;
2509                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2510                         cpi->cpi_compunitid = cpi->cpi_coreid;
2511                         break;
2512                 }
2513         }
2514 }
2515 
2516 /*
2517  * Gather relevant CPU features from leaf 6 which covers thermal information. We
2518  * always gather leaf 6 if it's supported; however, we only look for features on
2519  * Intel systems as AMD does not currently define any of the features we look
2520  * for below.
2521  */
2522 static void
2523 cpuid_pass1_thermal(cpu_t *cpu, uchar_t *featureset)
2524 {
2525         struct cpuid_regs *cp;
2526         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2527 
2528         if (cpi->cpi_maxeax < 6) {
2529                 return;
2530         }
2531 
2532         cp = &cpi->cpi_std[6];
2533         cp->cp_eax = 6;
2534         cp->cp_ebx = cp->cp_ecx = cp->cp_edx = 0;
2535         (void) __cpuid_insn(cp);
2536         platform_cpuid_mangle(cpi->cpi_vendor, 6, cp);
2537 
2538         if (cpi->cpi_vendor != X86_VENDOR_Intel) {
2539                 return;
2540         }
2541 
2542         if ((cp->cp_eax & CPUID_INTC_EAX_DTS) != 0) {
2543                 add_x86_feature(featureset, X86FSET_CORE_THERMAL);
2544         }
2545 
2546         if ((cp->cp_eax & CPUID_INTC_EAX_PTM) != 0) {
2547                 add_x86_feature(featureset, X86FSET_PKG_THERMAL);
2548         }
2549 }
2550 
2551 void
2552 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2553 {
2554         uint32_t mask_ecx, mask_edx;
2555         struct cpuid_info *cpi;
2556         struct cpuid_regs *cp;
2557         int xcpuid;
2558 #if !defined(__xpv)
2559         extern int idle_cpu_prefer_mwait;
2560 #endif
2561 
2562         /*
2563          * Space statically allocated for BSP, ensure pointer is set
2564          */
2565         if (cpu->cpu_id == 0) {
2566                 if (cpu->cpu_m.mcpu_cpi == NULL)
2567                         cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2568         }
2569 
2570         add_x86_feature(featureset, X86FSET_CPUID);
2571 
2572         cpi = cpu->cpu_m.mcpu_cpi;
2573         ASSERT(cpi != NULL);
2574         cp = &cpi->cpi_std[0];
2575         cp->cp_eax = 0;
2576         cpi->cpi_maxeax = __cpuid_insn(cp);
2577         {
2578                 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2579                 *iptr++ = cp->cp_ebx;
2580                 *iptr++ = cp->cp_edx;
2581                 *iptr++ = cp->cp_ecx;
2582                 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2583         }
2584 
2585         cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2586         x86_vendor = cpi->cpi_vendor; /* for compatibility */
2587 
2588         /*
2589          * Limit the range in case of weird hardware
2590          */
2591         if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2592                 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2593         if (cpi->cpi_maxeax < 1)
2594                 goto pass1_done;
2595 
2596         cp = &cpi->cpi_std[1];
2597         cp->cp_eax = 1;
2598         (void) __cpuid_insn(cp);
2599 
2600         /*
2601          * Extract identifying constants for easy access.
2602          */
2603         cpi->cpi_model = CPI_MODEL(cpi);
2604         cpi->cpi_family = CPI_FAMILY(cpi);
2605 
2606         if (cpi->cpi_family == 0xf)
2607                 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2608 
2609         /*
2610          * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2611          * Intel, and presumably everyone else, uses model == 0xf, as
2612          * one would expect (max value means possible overflow).  Sigh.
2613          */
2614 
2615         switch (cpi->cpi_vendor) {
2616         case X86_VENDOR_Intel:
2617                 if (IS_EXTENDED_MODEL_INTEL(cpi))
2618                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2619                 break;
2620         case X86_VENDOR_AMD:
2621                 if (CPI_FAMILY(cpi) == 0xf)
2622                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2623                 break;
2624         default:
2625                 if (cpi->cpi_model == 0xf)
2626                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2627                 break;
2628         }
2629 
2630         cpi->cpi_step = CPI_STEP(cpi);
2631         cpi->cpi_brandid = CPI_BRANDID(cpi);
2632 
2633         /*
2634          * *default* assumptions:
2635          * - believe %edx feature word
2636          * - ignore %ecx feature word
2637          * - 32-bit virtual and physical addressing
2638          */
2639         mask_edx = 0xffffffff;
2640         mask_ecx = 0;
2641 
2642         cpi->cpi_pabits = cpi->cpi_vabits = 32;
2643 
2644         switch (cpi->cpi_vendor) {
2645         case X86_VENDOR_Intel:
2646                 if (cpi->cpi_family == 5)
2647                         x86_type = X86_TYPE_P5;
2648                 else if (IS_LEGACY_P6(cpi)) {
2649                         x86_type = X86_TYPE_P6;
2650                         pentiumpro_bug4046376 = 1;
2651                         /*
2652                          * Clear the SEP bit when it was set erroneously
2653                          */
2654                         if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2655                                 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2656                 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2657                         x86_type = X86_TYPE_P4;
2658                         /*
2659                          * We don't currently depend on any of the %ecx
2660                          * features until Prescott, so we'll only check
2661                          * this from P4 onwards.  We might want to revisit
2662                          * that idea later.
2663                          */
2664                         mask_ecx = 0xffffffff;
2665                 } else if (cpi->cpi_family > 0xf)
2666                         mask_ecx = 0xffffffff;
2667                 /*
2668                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2669                  * to obtain the monitor linesize.
2670                  */
2671                 if (cpi->cpi_maxeax < 5)
2672                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2673                 break;
2674         case X86_VENDOR_IntelClone:
2675         default:
2676                 break;
2677         case X86_VENDOR_AMD:
2678 #if defined(OPTERON_ERRATUM_108)
2679                 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2680                         cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2681                         cpi->cpi_model = 0xc;
2682                 } else
2683 #endif
2684                 if (cpi->cpi_family == 5) {
2685                         /*
2686                          * AMD K5 and K6
2687                          *
2688                          * These CPUs have an incomplete implementation
2689                          * of MCA/MCE which we mask away.
2690                          */
2691                         mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2692 
2693                         /*
2694                          * Model 0 uses the wrong (APIC) bit
2695                          * to indicate PGE.  Fix it here.
2696                          */
2697                         if (cpi->cpi_model == 0) {
2698                                 if (cp->cp_edx & 0x200) {
2699                                         cp->cp_edx &= ~0x200;
2700                                         cp->cp_edx |= CPUID_INTC_EDX_PGE;
2701                                 }
2702                         }
2703 
2704                         /*
2705                          * Early models had problems w/ MMX; disable.
2706                          */
2707                         if (cpi->cpi_model < 6)
2708                                 mask_edx &= ~CPUID_INTC_EDX_MMX;
2709                 }
2710 
2711                 /*
2712                  * For newer families, SSE3 and CX16, at least, are valid;
2713                  * enable all
2714                  */
2715                 if (cpi->cpi_family >= 0xf)
2716                         mask_ecx = 0xffffffff;
2717                 /*
2718                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2719                  * to obtain the monitor linesize.
2720                  */
2721                 if (cpi->cpi_maxeax < 5)
2722                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2723 
2724 #if !defined(__xpv)
2725                 /*
2726                  * AMD has not historically used MWAIT in the CPU's idle loop.
2727                  * Pre-family-10h Opterons do not have the MWAIT instruction. We
2728                  * know for certain that in at least family 17h, per AMD, mwait
2729                  * is preferred. Families in-between are less certain.
2730                  */
2731                 if (cpi->cpi_family < 0x17) {
2732                         idle_cpu_prefer_mwait = 0;
2733                 }
2734 #endif
2735 
2736                 break;
2737         case X86_VENDOR_TM:
2738                 /*
2739                  * workaround the NT workaround in CMS 4.1
2740                  */
2741                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2742                     (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2743                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2744                 break;
2745         case X86_VENDOR_Centaur:
2746                 /*
2747                  * workaround the NT workarounds again
2748                  */
2749                 if (cpi->cpi_family == 6)
2750                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2751                 break;
2752         case X86_VENDOR_Cyrix:
2753                 /*
2754                  * We rely heavily on the probing in locore
2755                  * to actually figure out what parts, if any,
2756                  * of the Cyrix cpuid instruction to believe.
2757                  */
2758                 switch (x86_type) {
2759                 case X86_TYPE_CYRIX_486:
2760                         mask_edx = 0;
2761                         break;
2762                 case X86_TYPE_CYRIX_6x86:
2763                         mask_edx = 0;
2764                         break;
2765                 case X86_TYPE_CYRIX_6x86L:
2766                         mask_edx =
2767                             CPUID_INTC_EDX_DE |
2768                             CPUID_INTC_EDX_CX8;
2769                         break;
2770                 case X86_TYPE_CYRIX_6x86MX:
2771                         mask_edx =
2772                             CPUID_INTC_EDX_DE |
2773                             CPUID_INTC_EDX_MSR |
2774                             CPUID_INTC_EDX_CX8 |
2775                             CPUID_INTC_EDX_PGE |
2776                             CPUID_INTC_EDX_CMOV |
2777                             CPUID_INTC_EDX_MMX;
2778                         break;
2779                 case X86_TYPE_CYRIX_GXm:
2780                         mask_edx =
2781                             CPUID_INTC_EDX_MSR |
2782                             CPUID_INTC_EDX_CX8 |
2783                             CPUID_INTC_EDX_CMOV |
2784                             CPUID_INTC_EDX_MMX;
2785                         break;
2786                 case X86_TYPE_CYRIX_MediaGX:
2787                         break;
2788                 case X86_TYPE_CYRIX_MII:
2789                 case X86_TYPE_VIA_CYRIX_III:
2790                         mask_edx =
2791                             CPUID_INTC_EDX_DE |
2792                             CPUID_INTC_EDX_TSC |
2793                             CPUID_INTC_EDX_MSR |
2794                             CPUID_INTC_EDX_CX8 |
2795                             CPUID_INTC_EDX_PGE |
2796                             CPUID_INTC_EDX_CMOV |
2797                             CPUID_INTC_EDX_MMX;
2798                         break;
2799                 default:
2800                         break;
2801                 }
2802                 break;
2803         }
2804 
2805 #if defined(__xpv)
2806         /*
2807          * Do not support MONITOR/MWAIT under a hypervisor
2808          */
2809         mask_ecx &= ~CPUID_INTC_ECX_MON;
2810         /*
2811          * Do not support XSAVE under a hypervisor for now
2812          */
2813         xsave_force_disable = B_TRUE;
2814 
2815 #endif  /* __xpv */
2816 
2817         if (xsave_force_disable) {
2818                 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2819                 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2820                 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2821                 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2822         }
2823 
2824         /*
2825          * Now we've figured out the masks that determine
2826          * which bits we choose to believe, apply the masks
2827          * to the feature words, then map the kernel's view
2828          * of these feature words into its feature word.
2829          */
2830         cp->cp_edx &= mask_edx;
2831         cp->cp_ecx &= mask_ecx;
2832 
2833         /*
2834          * apply any platform restrictions (we don't call this
2835          * immediately after __cpuid_insn here, because we need the
2836          * workarounds applied above first)
2837          */
2838         platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2839 
2840         /*
2841          * In addition to ecx and edx, Intel and AMD are storing a bunch of
2842          * instruction set extensions in leaf 7's ebx, ecx, and edx.
2843          */
2844         if (cpi->cpi_maxeax >= 7) {
2845                 struct cpuid_regs *ecp;
2846                 ecp = &cpi->cpi_std[7];
2847                 ecp->cp_eax = 7;
2848                 ecp->cp_ecx = 0;
2849                 (void) __cpuid_insn(ecp);
2850 
2851                 /*
2852                  * If XSAVE has been disabled, just ignore all of the
2853                  * extended-save-area dependent flags here.
2854                  */
2855                 if (xsave_force_disable) {
2856                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2857                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2858                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2859                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2860                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2861                         ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2862                         ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2863                 }
2864 
2865                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2866                         add_x86_feature(featureset, X86FSET_SMEP);
2867 
2868                 /*
2869                  * We check disable_smap here in addition to in startup_smap()
2870                  * to ensure CPUs that aren't the boot CPU don't accidentally
2871                  * include it in the feature set and thus generate a mismatched
2872                  * x86 feature set across CPUs.
2873                  */
2874                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2875                     disable_smap == 0)
2876                         add_x86_feature(featureset, X86FSET_SMAP);
2877 
2878                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2879                         add_x86_feature(featureset, X86FSET_RDSEED);
2880 
2881                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2882                         add_x86_feature(featureset, X86FSET_ADX);
2883 
2884                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2885                         add_x86_feature(featureset, X86FSET_FSGSBASE);
2886 
2887                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2888                         add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2889 
2890                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2891                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2892                                 add_x86_feature(featureset, X86FSET_INVPCID);
2893 
2894                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2895                                 add_x86_feature(featureset, X86FSET_MPX);
2896 
2897                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2898                                 add_x86_feature(featureset, X86FSET_CLWB);
2899                 }
2900         }
2901 
2902         /*
2903          * fold in overrides from the "eeprom" mechanism
2904          */
2905         cp->cp_edx |= cpuid_feature_edx_include;
2906         cp->cp_edx &= ~cpuid_feature_edx_exclude;
2907 
2908         cp->cp_ecx |= cpuid_feature_ecx_include;
2909         cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2910 
2911         if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2912                 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2913         }
2914         if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2915                 add_x86_feature(featureset, X86FSET_TSC);
2916         }
2917         if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2918                 add_x86_feature(featureset, X86FSET_MSR);
2919         }
2920         if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2921                 add_x86_feature(featureset, X86FSET_MTRR);
2922         }
2923         if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2924                 add_x86_feature(featureset, X86FSET_PGE);
2925         }
2926         if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2927                 add_x86_feature(featureset, X86FSET_CMOV);
2928         }
2929         if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2930                 add_x86_feature(featureset, X86FSET_MMX);
2931         }
2932         if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2933             (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2934                 add_x86_feature(featureset, X86FSET_MCA);
2935         }
2936         if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2937                 add_x86_feature(featureset, X86FSET_PAE);
2938         }
2939         if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2940                 add_x86_feature(featureset, X86FSET_CX8);
2941         }
2942         if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2943                 add_x86_feature(featureset, X86FSET_CX16);
2944         }
2945         if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2946                 add_x86_feature(featureset, X86FSET_PAT);
2947         }
2948         if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2949                 add_x86_feature(featureset, X86FSET_SEP);
2950         }
2951         if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2952                 /*
2953                  * In our implementation, fxsave/fxrstor
2954                  * are prerequisites before we'll even
2955                  * try and do SSE things.
2956                  */
2957                 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2958                         add_x86_feature(featureset, X86FSET_SSE);
2959                 }
2960                 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2961                         add_x86_feature(featureset, X86FSET_SSE2);
2962                 }
2963                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2964                         add_x86_feature(featureset, X86FSET_SSE3);
2965                 }
2966                 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2967                         add_x86_feature(featureset, X86FSET_SSSE3);
2968                 }
2969                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2970                         add_x86_feature(featureset, X86FSET_SSE4_1);
2971                 }
2972                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2973                         add_x86_feature(featureset, X86FSET_SSE4_2);
2974                 }
2975                 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2976                         add_x86_feature(featureset, X86FSET_AES);
2977                 }
2978                 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2979                         add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2980                 }
2981 
2982                 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2983                         add_x86_feature(featureset, X86FSET_SHA);
2984 
2985                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2986                         add_x86_feature(featureset, X86FSET_UMIP);
2987                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2988                         add_x86_feature(featureset, X86FSET_PKU);
2989                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2990                         add_x86_feature(featureset, X86FSET_OSPKE);
2991 
2992                 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2993                         add_x86_feature(featureset, X86FSET_XSAVE);
2994 
2995                         /* We only test AVX & AVX512 when there is XSAVE */
2996 
2997                         if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2998                                 add_x86_feature(featureset,
2999                                     X86FSET_AVX);
3000 
3001                                 /*
3002                                  * Intel says we can't check these without also
3003                                  * checking AVX.
3004                                  */
3005                                 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
3006                                         add_x86_feature(featureset,
3007                                             X86FSET_F16C);
3008 
3009                                 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
3010                                         add_x86_feature(featureset,
3011                                             X86FSET_FMA);
3012 
3013                                 if (cpi->cpi_std[7].cp_ebx &
3014                                     CPUID_INTC_EBX_7_0_BMI1)
3015                                         add_x86_feature(featureset,
3016                                             X86FSET_BMI1);
3017 
3018                                 if (cpi->cpi_std[7].cp_ebx &
3019                                     CPUID_INTC_EBX_7_0_BMI2)
3020                                         add_x86_feature(featureset,
3021                                             X86FSET_BMI2);
3022 
3023                                 if (cpi->cpi_std[7].cp_ebx &
3024                                     CPUID_INTC_EBX_7_0_AVX2)
3025                                         add_x86_feature(featureset,
3026                                             X86FSET_AVX2);
3027                         }
3028 
3029                         if (cpi->cpi_vendor == X86_VENDOR_Intel &&
3030                             (cpi->cpi_std[7].cp_ebx &
3031                             CPUID_INTC_EBX_7_0_AVX512F) != 0) {
3032                                 add_x86_feature(featureset, X86FSET_AVX512F);
3033 
3034                                 if (cpi->cpi_std[7].cp_ebx &
3035                                     CPUID_INTC_EBX_7_0_AVX512DQ)
3036                                         add_x86_feature(featureset,
3037                                             X86FSET_AVX512DQ);
3038                                 if (cpi->cpi_std[7].cp_ebx &
3039                                     CPUID_INTC_EBX_7_0_AVX512IFMA)
3040                                         add_x86_feature(featureset,
3041                                             X86FSET_AVX512FMA);
3042                                 if (cpi->cpi_std[7].cp_ebx &
3043                                     CPUID_INTC_EBX_7_0_AVX512PF)
3044                                         add_x86_feature(featureset,
3045                                             X86FSET_AVX512PF);
3046                                 if (cpi->cpi_std[7].cp_ebx &
3047                                     CPUID_INTC_EBX_7_0_AVX512ER)
3048                                         add_x86_feature(featureset,
3049                                             X86FSET_AVX512ER);
3050                                 if (cpi->cpi_std[7].cp_ebx &
3051                                     CPUID_INTC_EBX_7_0_AVX512CD)
3052                                         add_x86_feature(featureset,
3053                                             X86FSET_AVX512CD);
3054                                 if (cpi->cpi_std[7].cp_ebx &
3055                                     CPUID_INTC_EBX_7_0_AVX512BW)
3056                                         add_x86_feature(featureset,
3057                                             X86FSET_AVX512BW);
3058                                 if (cpi->cpi_std[7].cp_ebx &
3059                                     CPUID_INTC_EBX_7_0_AVX512VL)
3060                                         add_x86_feature(featureset,
3061                                             X86FSET_AVX512VL);
3062 
3063                                 if (cpi->cpi_std[7].cp_ecx &
3064                                     CPUID_INTC_ECX_7_0_AVX512VBMI)
3065                                         add_x86_feature(featureset,
3066                                             X86FSET_AVX512VBMI);
3067                                 if (cpi->cpi_std[7].cp_ecx &
3068                                     CPUID_INTC_ECX_7_0_AVX512VNNI)
3069                                         add_x86_feature(featureset,
3070                                             X86FSET_AVX512VNNI);
3071                                 if (cpi->cpi_std[7].cp_ecx &
3072                                     CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
3073                                         add_x86_feature(featureset,
3074                                             X86FSET_AVX512VPOPCDQ);
3075 
3076                                 if (cpi->cpi_std[7].cp_edx &
3077                                     CPUID_INTC_EDX_7_0_AVX5124NNIW)
3078                                         add_x86_feature(featureset,
3079                                             X86FSET_AVX512NNIW);
3080                                 if (cpi->cpi_std[7].cp_edx &
3081                                     CPUID_INTC_EDX_7_0_AVX5124FMAPS)
3082                                         add_x86_feature(featureset,
3083                                             X86FSET_AVX512FMAPS);
3084                         }
3085                 }
3086         }
3087 
3088         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
3089                 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
3090                         add_x86_feature(featureset, X86FSET_PCID);
3091                 }
3092         }
3093 
3094         if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
3095                 add_x86_feature(featureset, X86FSET_X2APIC);
3096         }
3097         if (cp->cp_edx & CPUID_INTC_EDX_DE) {
3098                 add_x86_feature(featureset, X86FSET_DE);
3099         }
3100 #if !defined(__xpv)
3101         if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
3102 
3103                 /*
3104                  * We require the CLFLUSH instruction for erratum workaround
3105                  * to use MONITOR/MWAIT.
3106                  */
3107                 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
3108                         cpi->cpi_mwait.support |= MWAIT_SUPPORT;
3109                         add_x86_feature(featureset, X86FSET_MWAIT);
3110                 } else {
3111                         extern int idle_cpu_assert_cflush_monitor;
3112 
3113                         /*
3114                          * All processors we are aware of which have
3115                          * MONITOR/MWAIT also have CLFLUSH.
3116                          */
3117                         if (idle_cpu_assert_cflush_monitor) {
3118                                 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
3119                                     (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
3120                         }
3121                 }
3122         }
3123 #endif  /* __xpv */
3124 
3125         if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
3126                 add_x86_feature(featureset, X86FSET_VMX);
3127         }
3128 
3129         if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
3130                 add_x86_feature(featureset, X86FSET_RDRAND);
3131 
3132         /*
3133          * Only need it first time, rest of the cpus would follow suit.
3134          * we only capture this for the bootcpu.
3135          */
3136         if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
3137                 add_x86_feature(featureset, X86FSET_CLFSH);
3138                 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
3139         }
3140         if (is_x86_feature(featureset, X86FSET_PAE))
3141                 cpi->cpi_pabits = 36;
3142 
3143         if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
3144                 struct cpuid_regs r, *ecp;
3145 
3146                 ecp = &r;
3147                 ecp->cp_eax = 0xD;
3148                 ecp->cp_ecx = 1;
3149                 ecp->cp_edx = ecp->cp_ebx = 0;
3150                 (void) __cpuid_insn(ecp);
3151 
3152                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
3153                         add_x86_feature(featureset, X86FSET_XSAVEOPT);
3154                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
3155                         add_x86_feature(featureset, X86FSET_XSAVEC);
3156                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
3157                         add_x86_feature(featureset, X86FSET_XSAVES);
3158         }
3159 
3160         /*
3161          * Work on the "extended" feature information, doing
3162          * some basic initialization for cpuid_pass2()
3163          */
3164         xcpuid = 0;
3165         switch (cpi->cpi_vendor) {
3166         case X86_VENDOR_Intel:
3167                 /*
3168                  * On KVM we know we will have proper support for extended
3169                  * cpuid.
3170                  */
3171                 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
3172                     (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
3173                     (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
3174                         xcpuid++;
3175                 break;
3176         case X86_VENDOR_AMD:
3177                 if (cpi->cpi_family > 5 ||
3178                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3179                         xcpuid++;
3180                 break;
3181         case X86_VENDOR_Cyrix:
3182                 /*
3183                  * Only these Cyrix CPUs are -known- to support
3184                  * extended cpuid operations.
3185                  */
3186                 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
3187                     x86_type == X86_TYPE_CYRIX_GXm)
3188                         xcpuid++;
3189                 break;
3190         case X86_VENDOR_Centaur:
3191         case X86_VENDOR_TM:
3192         default:
3193                 xcpuid++;
3194                 break;
3195         }
3196 
3197         if (xcpuid) {
3198                 cp = &cpi->cpi_extd[0];
3199                 cp->cp_eax = CPUID_LEAF_EXT_0;
3200                 cpi->cpi_xmaxeax = __cpuid_insn(cp);
3201         }
3202 
3203         if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
3204 
3205                 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3206                         cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3207 
3208                 switch (cpi->cpi_vendor) {
3209                 case X86_VENDOR_Intel:
3210                 case X86_VENDOR_AMD:
3211                         if (cpi->cpi_xmaxeax < 0x80000001)
3212                                 break;
3213                         cp = &cpi->cpi_extd[1];
3214                         cp->cp_eax = 0x80000001;
3215                         (void) __cpuid_insn(cp);
3216 
3217                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3218                             cpi->cpi_family == 5 &&
3219                             cpi->cpi_model == 6 &&
3220                             cpi->cpi_step == 6) {
3221                                 /*
3222                                  * K6 model 6 uses bit 10 to indicate SYSC
3223                                  * Later models use bit 11. Fix it here.
3224                                  */
3225                                 if (cp->cp_edx & 0x400) {
3226                                         cp->cp_edx &= ~0x400;
3227                                         cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3228                                 }
3229                         }
3230 
3231                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3232 
3233                         /*
3234                          * Compute the additions to the kernel's feature word.
3235                          */
3236                         if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3237                                 add_x86_feature(featureset, X86FSET_NX);
3238                         }
3239 
3240                         /*
3241                          * Regardless whether or not we boot 64-bit,
3242                          * we should have a way to identify whether
3243                          * the CPU is capable of running 64-bit.
3244                          */
3245                         if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3246                                 add_x86_feature(featureset, X86FSET_64);
3247                         }
3248 
3249                         /* 1 GB large page - enable only for 64 bit kernel */
3250                         if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3251                                 add_x86_feature(featureset, X86FSET_1GPG);
3252                         }
3253 
3254                         if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3255                             (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3256                             (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3257                                 add_x86_feature(featureset, X86FSET_SSE4A);
3258                         }
3259 
3260                         /*
3261                          * It's really tricky to support syscall/sysret in
3262                          * the i386 kernel; we rely on sysenter/sysexit
3263                          * instead.  In the amd64 kernel, things are -way-
3264                          * better.
3265                          */
3266                         if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3267                                 add_x86_feature(featureset, X86FSET_ASYSC);
3268                         }
3269 
3270                         /*
3271                          * While we're thinking about system calls, note
3272                          * that AMD processors don't support sysenter
3273                          * in long mode at all, so don't try to program them.
3274                          */
3275                         if (x86_vendor == X86_VENDOR_AMD) {
3276                                 remove_x86_feature(featureset, X86FSET_SEP);
3277                         }
3278 
3279                         if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3280                                 add_x86_feature(featureset, X86FSET_TSCP);
3281                         }
3282 
3283                         if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3284                                 add_x86_feature(featureset, X86FSET_SVM);
3285                         }
3286 
3287                         if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3288                                 add_x86_feature(featureset, X86FSET_TOPOEXT);
3289                         }
3290 
3291                         if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) {
3292                                 add_x86_feature(featureset, X86FSET_AMD_PCEC);
3293                         }
3294 
3295                         if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3296                                 add_x86_feature(featureset, X86FSET_XOP);
3297                         }
3298 
3299                         if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3300                                 add_x86_feature(featureset, X86FSET_FMA4);
3301                         }
3302 
3303                         if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3304                                 add_x86_feature(featureset, X86FSET_TBM);
3305                         }
3306 
3307                         if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3308                                 add_x86_feature(featureset, X86FSET_MONITORX);
3309                         }
3310                         break;
3311                 default:
3312                         break;
3313                 }
3314 
3315                 /*
3316                  * Get CPUID data about processor cores and hyperthreads.
3317                  */
3318                 switch (cpi->cpi_vendor) {
3319                 case X86_VENDOR_Intel:
3320                         if (cpi->cpi_maxeax >= 4) {
3321                                 cp = &cpi->cpi_std[4];
3322                                 cp->cp_eax = 4;
3323                                 cp->cp_ecx = 0;
3324                                 (void) __cpuid_insn(cp);
3325                                 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3326                         }
3327                         /*FALLTHROUGH*/
3328                 case X86_VENDOR_AMD:
3329                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3330                                 break;
3331                         cp = &cpi->cpi_extd[8];
3332                         cp->cp_eax = CPUID_LEAF_EXT_8;
3333                         (void) __cpuid_insn(cp);
3334                         platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3335                             cp);
3336 
3337                         /*
3338                          * AMD uses ebx for some extended functions.
3339                          */
3340                         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3341                                 /*
3342                                  * While we're here, check for the AMD "Error
3343                                  * Pointer Zero/Restore" feature. This can be
3344                                  * used to setup the FP save handlers
3345                                  * appropriately.
3346                                  */
3347                                 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3348                                         cpi->cpi_fp_amd_save = 0;
3349                                 } else {
3350                                         cpi->cpi_fp_amd_save = 1;
3351                                 }
3352 
3353                                 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3354                                         add_x86_feature(featureset,
3355                                             X86FSET_CLZERO);
3356                                 }
3357                         }
3358 
3359                         /*
3360                          * Virtual and physical address limits from
3361                          * cpuid override previously guessed values.
3362                          */
3363                         cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3364                         cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3365                         break;
3366                 default:
3367                         break;
3368                 }
3369 
3370                 /*
3371                  * Get CPUID data about TSC Invariance in Deep C-State.
3372                  */
3373                 switch (cpi->cpi_vendor) {
3374                 case X86_VENDOR_Intel:
3375                 case X86_VENDOR_AMD:
3376                         if (cpi->cpi_maxeax >= 7) {
3377                                 cp = &cpi->cpi_extd[7];
3378                                 cp->cp_eax = 0x80000007;
3379                                 cp->cp_ecx = 0;
3380                                 (void) __cpuid_insn(cp);
3381                         }
3382                         break;
3383                 default:
3384                         break;
3385                 }
3386         }
3387 
3388         cpuid_pass1_topology(cpu, featureset);
3389         cpuid_pass1_thermal(cpu, featureset);
3390 
3391         /*
3392          * Synthesize chip "revision" and socket type
3393          */
3394         cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3395             cpi->cpi_model, cpi->cpi_step);
3396         cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3397             cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3398         cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3399             cpi->cpi_model, cpi->cpi_step);
3400 
3401         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3402                 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3403                     cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3404                         /* Special handling for AMD FP not necessary. */
3405                         cpi->cpi_fp_amd_save = 0;
3406                 } else {
3407                         cpi->cpi_fp_amd_save = 1;
3408                 }
3409         }
3410 
3411         /*
3412          * Check the processor leaves that are used for security features.
3413          */
3414         cpuid_scan_security(cpu, featureset);
3415 
3416 pass1_done:
3417         cpi->cpi_pass = 1;
3418 }
3419 
3420 /*
3421  * Make copies of the cpuid table entries we depend on, in
3422  * part for ease of parsing now, in part so that we have only
3423  * one place to correct any of it, in part for ease of
3424  * later export to userland, and in part so we can look at
3425  * this stuff in a crash dump.
3426  */
3427 
3428 /*ARGSUSED*/
3429 void
3430 cpuid_pass2(cpu_t *cpu)
3431 {
3432         uint_t n, nmax;
3433         int i;
3434         struct cpuid_regs *cp;
3435         uint8_t *dp;
3436         uint32_t *iptr;
3437         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3438 
3439         ASSERT(cpi->cpi_pass == 1);
3440 
3441         if (cpi->cpi_maxeax < 1)
3442                 goto pass2_done;
3443 
3444         if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3445                 nmax = NMAX_CPI_STD;
3446         /*
3447          * (We already handled n == 0 and n == 1 in pass 1)
3448          */
3449         for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3450                 /*
3451                  * leaves 6 and 7 were handled in pass 1
3452                  */
3453                 if (n == 6 || n == 7)
3454                         continue;
3455 
3456                 cp->cp_eax = n;
3457 
3458                 /*
3459                  * CPUID function 4 expects %ecx to be initialized
3460                  * with an index which indicates which cache to return
3461                  * information about. The OS is expected to call function 4
3462                  * with %ecx set to 0, 1, 2, ... until it returns with
3463                  * EAX[4:0] set to 0, which indicates there are no more
3464                  * caches.
3465                  *
3466                  * Here, populate cpi_std[4] with the information returned by
3467                  * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3468                  * when dynamic memory allocation becomes available.
3469                  *
3470                  * Note: we need to explicitly initialize %ecx here, since
3471                  * function 4 may have been previously invoked.
3472                  */
3473                 if (n == 4)
3474                         cp->cp_ecx = 0;
3475 
3476                 (void) __cpuid_insn(cp);
3477                 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3478                 switch (n) {
3479                 case 2:
3480                         /*
3481                          * "the lower 8 bits of the %eax register
3482                          * contain a value that identifies the number
3483                          * of times the cpuid [instruction] has to be
3484                          * executed to obtain a complete image of the
3485                          * processor's caching systems."
3486                          *
3487                          * How *do* they make this stuff up?
3488                          */
3489                         cpi->cpi_ncache = sizeof (*cp) *
3490                             BITX(cp->cp_eax, 7, 0);
3491                         if (cpi->cpi_ncache == 0)
3492                                 break;
3493                         cpi->cpi_ncache--;   /* skip count byte */
3494 
3495                         /*
3496                          * Well, for now, rather than attempt to implement
3497                          * this slightly dubious algorithm, we just look
3498                          * at the first 15 ..
3499                          */
3500                         if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3501                                 cpi->cpi_ncache = sizeof (*cp) - 1;
3502 
3503                         dp = cpi->cpi_cacheinfo;
3504                         if (BITX(cp->cp_eax, 31, 31) == 0) {
3505                                 uint8_t *p = (void *)&cp->cp_eax;
3506                                 for (i = 1; i < 4; i++)
3507                                         if (p[i] != 0)
3508                                                 *dp++ = p[i];
3509                         }
3510                         if (BITX(cp->cp_ebx, 31, 31) == 0) {
3511                                 uint8_t *p = (void *)&cp->cp_ebx;
3512                                 for (i = 0; i < 4; i++)
3513                                         if (p[i] != 0)
3514                                                 *dp++ = p[i];
3515                         }
3516                         if (BITX(cp->cp_ecx, 31, 31) == 0) {
3517                                 uint8_t *p = (void *)&cp->cp_ecx;
3518                                 for (i = 0; i < 4; i++)
3519                                         if (p[i] != 0)
3520                                                 *dp++ = p[i];
3521                         }
3522                         if (BITX(cp->cp_edx, 31, 31) == 0) {
3523                                 uint8_t *p = (void *)&cp->cp_edx;
3524                                 for (i = 0; i < 4; i++)
3525                                         if (p[i] != 0)
3526                                                 *dp++ = p[i];
3527                         }
3528                         break;
3529 
3530                 case 3: /* Processor serial number, if PSN supported */
3531                         break;
3532 
3533                 case 4: /* Deterministic cache parameters */
3534                         break;
3535 
3536                 case 5: /* Monitor/Mwait parameters */
3537                 {
3538                         size_t mwait_size;
3539 
3540                         /*
3541                          * check cpi_mwait.support which was set in cpuid_pass1
3542                          */
3543                         if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3544                                 break;
3545 
3546                         /*
3547                          * Protect ourself from insane mwait line size.
3548                          * Workaround for incomplete hardware emulator(s).
3549                          */
3550                         mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3551                         if (mwait_size < sizeof (uint32_t) ||
3552                             !ISP2(mwait_size)) {
3553 #if DEBUG
3554                                 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3555                                     "size %ld", cpu->cpu_id, (long)mwait_size);
3556 #endif
3557                                 break;
3558                         }
3559 
3560                         cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3561                         cpi->cpi_mwait.mon_max = mwait_size;
3562                         if (MWAIT_EXTENSION(cpi)) {
3563                                 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3564                                 if (MWAIT_INT_ENABLE(cpi))
3565                                         cpi->cpi_mwait.support |=
3566                                             MWAIT_ECX_INT_ENABLE;
3567                         }
3568                         break;
3569                 }
3570                 default:
3571                         break;
3572                 }
3573         }
3574 
3575         /*
3576          * XSAVE enumeration
3577          */
3578         if (cpi->cpi_maxeax >= 0xD) {
3579                 struct cpuid_regs regs;
3580                 boolean_t cpuid_d_valid = B_TRUE;
3581 
3582                 cp = &regs;
3583                 cp->cp_eax = 0xD;
3584                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3585 
3586                 (void) __cpuid_insn(cp);
3587 
3588                 /*
3589                  * Sanity checks for debug
3590                  */
3591                 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3592                     (cp->cp_eax & XFEATURE_SSE) == 0) {
3593                         cpuid_d_valid = B_FALSE;
3594                 }
3595 
3596                 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3597                 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3598                 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3599 
3600                 /*
3601                  * If the hw supports AVX, get the size and offset in the save
3602                  * area for the ymm state.
3603                  */
3604                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3605                         cp->cp_eax = 0xD;
3606                         cp->cp_ecx = 2;
3607                         cp->cp_edx = cp->cp_ebx = 0;
3608 
3609                         (void) __cpuid_insn(cp);
3610 
3611                         if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3612                             cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3613                                 cpuid_d_valid = B_FALSE;
3614                         }
3615 
3616                         cpi->cpi_xsave.ymm_size = cp->cp_eax;
3617                         cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3618                 }
3619 
3620                 /*
3621                  * If the hw supports MPX, get the size and offset in the
3622                  * save area for BNDREGS and BNDCSR.
3623                  */
3624                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3625                         cp->cp_eax = 0xD;
3626                         cp->cp_ecx = 3;
3627                         cp->cp_edx = cp->cp_ebx = 0;
3628 
3629                         (void) __cpuid_insn(cp);
3630 
3631                         cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3632                         cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3633 
3634                         cp->cp_eax = 0xD;
3635                         cp->cp_ecx = 4;
3636                         cp->cp_edx = cp->cp_ebx = 0;
3637 
3638                         (void) __cpuid_insn(cp);
3639 
3640                         cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3641                         cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3642                 }
3643 
3644                 /*
3645                  * If the hw supports AVX512, get the size and offset in the
3646                  * save area for the opmask registers and zmm state.
3647                  */
3648                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3649                         cp->cp_eax = 0xD;
3650                         cp->cp_ecx = 5;
3651                         cp->cp_edx = cp->cp_ebx = 0;
3652 
3653                         (void) __cpuid_insn(cp);
3654 
3655                         cpi->cpi_xsave.opmask_size = cp->cp_eax;
3656                         cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3657 
3658                         cp->cp_eax = 0xD;
3659                         cp->cp_ecx = 6;
3660                         cp->cp_edx = cp->cp_ebx = 0;
3661 
3662                         (void) __cpuid_insn(cp);
3663 
3664                         cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3665                         cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3666 
3667                         cp->cp_eax = 0xD;
3668                         cp->cp_ecx = 7;
3669                         cp->cp_edx = cp->cp_ebx = 0;
3670 
3671                         (void) __cpuid_insn(cp);
3672 
3673                         cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3674                         cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3675                 }
3676 
3677                 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3678                         xsave_state_size = 0;
3679                 } else if (cpuid_d_valid) {
3680                         xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3681                 } else {
3682                         /* Broken CPUID 0xD, probably in HVM */
3683                         cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3684                             "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3685                             ", ymm_size = %d, ymm_offset = %d\n",
3686                             cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3687                             cpi->cpi_xsave.xsav_hw_features_high,
3688                             (int)cpi->cpi_xsave.xsav_max_size,
3689                             (int)cpi->cpi_xsave.ymm_size,
3690                             (int)cpi->cpi_xsave.ymm_offset);
3691 
3692                         if (xsave_state_size != 0) {
3693                                 /*
3694                                  * This must be a non-boot CPU. We cannot
3695                                  * continue, because boot cpu has already
3696                                  * enabled XSAVE.
3697                                  */
3698                                 ASSERT(cpu->cpu_id != 0);
3699                                 cmn_err(CE_PANIC, "cpu%d: we have already "
3700                                     "enabled XSAVE on boot cpu, cannot "
3701                                     "continue.", cpu->cpu_id);
3702                         } else {
3703                                 /*
3704                                  * If we reached here on the boot CPU, it's also
3705                                  * almost certain that we'll reach here on the
3706                                  * non-boot CPUs. When we're here on a boot CPU
3707                                  * we should disable the feature, on a non-boot
3708                                  * CPU we need to confirm that we have.
3709                                  */
3710                                 if (cpu->cpu_id == 0) {
3711                                         remove_x86_feature(x86_featureset,
3712                                             X86FSET_XSAVE);
3713                                         remove_x86_feature(x86_featureset,
3714                                             X86FSET_AVX);
3715                                         remove_x86_feature(x86_featureset,
3716                                             X86FSET_F16C);
3717                                         remove_x86_feature(x86_featureset,
3718                                             X86FSET_BMI1);
3719                                         remove_x86_feature(x86_featureset,
3720                                             X86FSET_BMI2);
3721                                         remove_x86_feature(x86_featureset,
3722                                             X86FSET_FMA);
3723                                         remove_x86_feature(x86_featureset,
3724                                             X86FSET_AVX2);
3725                                         remove_x86_feature(x86_featureset,
3726                                             X86FSET_MPX);
3727                                         remove_x86_feature(x86_featureset,
3728                                             X86FSET_AVX512F);
3729                                         remove_x86_feature(x86_featureset,
3730                                             X86FSET_AVX512DQ);
3731                                         remove_x86_feature(x86_featureset,
3732                                             X86FSET_AVX512PF);
3733                                         remove_x86_feature(x86_featureset,
3734                                             X86FSET_AVX512ER);
3735                                         remove_x86_feature(x86_featureset,
3736                                             X86FSET_AVX512CD);
3737                                         remove_x86_feature(x86_featureset,
3738                                             X86FSET_AVX512BW);
3739                                         remove_x86_feature(x86_featureset,
3740                                             X86FSET_AVX512VL);
3741                                         remove_x86_feature(x86_featureset,
3742                                             X86FSET_AVX512FMA);
3743                                         remove_x86_feature(x86_featureset,
3744                                             X86FSET_AVX512VBMI);
3745                                         remove_x86_feature(x86_featureset,
3746                                             X86FSET_AVX512VNNI);
3747                                         remove_x86_feature(x86_featureset,
3748                                             X86FSET_AVX512VPOPCDQ);
3749                                         remove_x86_feature(x86_featureset,
3750                                             X86FSET_AVX512NNIW);
3751                                         remove_x86_feature(x86_featureset,
3752                                             X86FSET_AVX512FMAPS);
3753 
3754                                         CPI_FEATURES_ECX(cpi) &=
3755                                             ~CPUID_INTC_ECX_XSAVE;
3756                                         CPI_FEATURES_ECX(cpi) &=
3757                                             ~CPUID_INTC_ECX_AVX;
3758                                         CPI_FEATURES_ECX(cpi) &=
3759                                             ~CPUID_INTC_ECX_F16C;
3760                                         CPI_FEATURES_ECX(cpi) &=
3761                                             ~CPUID_INTC_ECX_FMA;
3762                                         CPI_FEATURES_7_0_EBX(cpi) &=
3763                                             ~CPUID_INTC_EBX_7_0_BMI1;
3764                                         CPI_FEATURES_7_0_EBX(cpi) &=
3765                                             ~CPUID_INTC_EBX_7_0_BMI2;
3766                                         CPI_FEATURES_7_0_EBX(cpi) &=
3767                                             ~CPUID_INTC_EBX_7_0_AVX2;
3768                                         CPI_FEATURES_7_0_EBX(cpi) &=
3769                                             ~CPUID_INTC_EBX_7_0_MPX;
3770                                         CPI_FEATURES_7_0_EBX(cpi) &=
3771                                             ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3772 
3773                                         CPI_FEATURES_7_0_ECX(cpi) &=
3774                                             ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3775 
3776                                         CPI_FEATURES_7_0_EDX(cpi) &=
3777                                             ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3778 
3779                                         xsave_force_disable = B_TRUE;
3780                                 } else {
3781                                         VERIFY(is_x86_feature(x86_featureset,
3782                                             X86FSET_XSAVE) == B_FALSE);
3783                                 }
3784                         }
3785                 }
3786         }
3787 
3788 
3789         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3790                 goto pass2_done;
3791 
3792         if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3793                 nmax = NMAX_CPI_EXTD;
3794         /*
3795          * Copy the extended properties, fixing them as we go.
3796          * (We already handled n == 0 and n == 1 in pass 1)
3797          */
3798         iptr = (void *)cpi->cpi_brandstr;
3799         for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3800                 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3801                 (void) __cpuid_insn(cp);
3802                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3803                     cp);
3804                 switch (n) {
3805                 case 2:
3806                 case 3:
3807                 case 4:
3808                         /*
3809                          * Extract the brand string
3810                          */
3811                         *iptr++ = cp->cp_eax;
3812                         *iptr++ = cp->cp_ebx;
3813                         *iptr++ = cp->cp_ecx;
3814                         *iptr++ = cp->cp_edx;
3815                         break;
3816                 case 5:
3817                         switch (cpi->cpi_vendor) {
3818                         case X86_VENDOR_AMD:
3819                                 /*
3820                                  * The Athlon and Duron were the first
3821                                  * parts to report the sizes of the
3822                                  * TLB for large pages. Before then,
3823                                  * we don't trust the data.
3824                                  */
3825                                 if (cpi->cpi_family < 6 ||
3826                                     (cpi->cpi_family == 6 &&
3827                                     cpi->cpi_model < 1))
3828                                         cp->cp_eax = 0;
3829                                 break;
3830                         default:
3831                                 break;
3832                         }
3833                         break;
3834                 case 6:
3835                         switch (cpi->cpi_vendor) {
3836                         case X86_VENDOR_AMD:
3837                                 /*
3838                                  * The Athlon and Duron were the first
3839                                  * AMD parts with L2 TLB's.
3840                                  * Before then, don't trust the data.
3841                                  */
3842                                 if (cpi->cpi_family < 6 ||
3843                                     cpi->cpi_family == 6 &&
3844                                     cpi->cpi_model < 1)
3845                                         cp->cp_eax = cp->cp_ebx = 0;
3846                                 /*
3847                                  * AMD Duron rev A0 reports L2
3848                                  * cache size incorrectly as 1K
3849                                  * when it is really 64K
3850                                  */
3851                                 if (cpi->cpi_family == 6 &&
3852                                     cpi->cpi_model == 3 &&
3853                                     cpi->cpi_step == 0) {
3854                                         cp->cp_ecx &= 0xffff;
3855                                         cp->cp_ecx |= 0x400000;
3856                                 }
3857                                 break;
3858                         case X86_VENDOR_Cyrix:  /* VIA C3 */
3859                                 /*
3860                                  * VIA C3 processors are a bit messed
3861                                  * up w.r.t. encoding cache sizes in %ecx
3862                                  */
3863                                 if (cpi->cpi_family != 6)
3864                                         break;
3865                                 /*
3866                                  * model 7 and 8 were incorrectly encoded
3867                                  *
3868                                  * xxx is model 8 really broken?
3869                                  */
3870                                 if (cpi->cpi_model == 7 ||
3871                                     cpi->cpi_model == 8)
3872                                         cp->cp_ecx =
3873                                             BITX(cp->cp_ecx, 31, 24) << 16 |
3874                                             BITX(cp->cp_ecx, 23, 16) << 12 |
3875                                             BITX(cp->cp_ecx, 15, 8) << 8 |
3876                                             BITX(cp->cp_ecx, 7, 0);
3877                                 /*
3878                                  * model 9 stepping 1 has wrong associativity
3879                                  */
3880                                 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3881                                         cp->cp_ecx |= 8 << 12;
3882                                 break;
3883                         case X86_VENDOR_Intel:
3884                                 /*
3885                                  * Extended L2 Cache features function.
3886                                  * First appeared on Prescott.
3887                                  */
3888                         default:
3889                                 break;
3890                         }
3891                         break;
3892                 default:
3893                         break;
3894                 }
3895         }
3896 
3897 pass2_done:
3898         cpi->cpi_pass = 2;
3899 }
3900 
3901 static const char *
3902 intel_cpubrand(const struct cpuid_info *cpi)
3903 {
3904         int i;
3905 
3906         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3907             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3908                 return ("i486");
3909 
3910         switch (cpi->cpi_family) {
3911         case 5:
3912                 return ("Intel Pentium(r)");
3913         case 6:
3914                 switch (cpi->cpi_model) {
3915                         uint_t celeron, xeon;
3916                         const struct cpuid_regs *cp;
3917                 case 0:
3918                 case 1:
3919                 case 2:
3920                         return ("Intel Pentium(r) Pro");
3921                 case 3:
3922                 case 4:
3923                         return ("Intel Pentium(r) II");
3924                 case 6:
3925                         return ("Intel Celeron(r)");
3926                 case 5:
3927                 case 7:
3928                         celeron = xeon = 0;
3929                         cp = &cpi->cpi_std[2];   /* cache info */
3930 
3931                         for (i = 1; i < 4; i++) {
3932                                 uint_t tmp;
3933 
3934                                 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3935                                 if (tmp == 0x40)
3936                                         celeron++;
3937                                 if (tmp >= 0x44 && tmp <= 0x45)
3938                                         xeon++;
3939                         }
3940 
3941                         for (i = 0; i < 2; i++) {
3942                                 uint_t tmp;
3943 
3944                                 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3945                                 if (tmp == 0x40)
3946                                         celeron++;
3947                                 else if (tmp >= 0x44 && tmp <= 0x45)
3948                                         xeon++;
3949                         }
3950 
3951                         for (i = 0; i < 4; i++) {
3952                                 uint_t tmp;
3953 
3954                                 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3955                                 if (tmp == 0x40)
3956                                         celeron++;
3957                                 else if (tmp >= 0x44 && tmp <= 0x45)
3958                                         xeon++;
3959                         }
3960 
3961                         for (i = 0; i < 4; i++) {
3962                                 uint_t tmp;
3963 
3964                                 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3965                                 if (tmp == 0x40)
3966                                         celeron++;
3967                                 else if (tmp >= 0x44 && tmp <= 0x45)
3968                                         xeon++;
3969                         }
3970 
3971                         if (celeron)
3972                                 return ("Intel Celeron(r)");
3973                         if (xeon)
3974                                 return (cpi->cpi_model == 5 ?
3975                                     "Intel Pentium(r) II Xeon(tm)" :
3976                                     "Intel Pentium(r) III Xeon(tm)");
3977                         return (cpi->cpi_model == 5 ?
3978                             "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3979                             "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3980                 default:
3981                         break;
3982                 }
3983         default:
3984                 break;
3985         }
3986 
3987         /* BrandID is present if the field is nonzero */
3988         if (cpi->cpi_brandid != 0) {
3989                 static const struct {
3990                         uint_t bt_bid;
3991                         const char *bt_str;
3992                 } brand_tbl[] = {
3993                         { 0x1,  "Intel(r) Celeron(r)" },
3994                         { 0x2,  "Intel(r) Pentium(r) III" },
3995                         { 0x3,  "Intel(r) Pentium(r) III Xeon(tm)" },
3996                         { 0x4,  "Intel(r) Pentium(r) III" },
3997                         { 0x6,  "Mobile Intel(r) Pentium(r) III" },
3998                         { 0x7,  "Mobile Intel(r) Celeron(r)" },
3999                         { 0x8,  "Intel(r) Pentium(r) 4" },
4000                         { 0x9,  "Intel(r) Pentium(r) 4" },
4001                         { 0xa,  "Intel(r) Celeron(r)" },
4002                         { 0xb,  "Intel(r) Xeon(tm)" },
4003                         { 0xc,  "Intel(r) Xeon(tm) MP" },
4004                         { 0xe,  "Mobile Intel(r) Pentium(r) 4" },
4005                         { 0xf,  "Mobile Intel(r) Celeron(r)" },
4006                         { 0x11, "Mobile Genuine Intel(r)" },
4007                         { 0x12, "Intel(r) Celeron(r) M" },
4008                         { 0x13, "Mobile Intel(r) Celeron(r)" },
4009                         { 0x14, "Intel(r) Celeron(r)" },
4010                         { 0x15, "Mobile Genuine Intel(r)" },
4011                         { 0x16, "Intel(r) Pentium(r) M" },
4012                         { 0x17, "Mobile Intel(r) Celeron(r)" }
4013                 };
4014                 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
4015                 uint_t sgn;
4016 
4017                 sgn = (cpi->cpi_family << 8) |
4018                     (cpi->cpi_model << 4) | cpi->cpi_step;
4019 
4020                 for (i = 0; i < btblmax; i++)
4021                         if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
4022                                 break;
4023                 if (i < btblmax) {
4024                         if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
4025                                 return ("Intel(r) Celeron(r)");
4026                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
4027                                 return ("Intel(r) Xeon(tm) MP");
4028                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
4029                                 return ("Intel(r) Xeon(tm)");
4030                         return (brand_tbl[i].bt_str);
4031                 }
4032         }
4033 
4034         return (NULL);
4035 }
4036 
4037 static const char *
4038 amd_cpubrand(const struct cpuid_info *cpi)
4039 {
4040         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
4041             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
4042                 return ("i486 compatible");
4043 
4044         switch (cpi->cpi_family) {
4045         case 5:
4046                 switch (cpi->cpi_model) {
4047                 case 0:
4048                 case 1:
4049                 case 2:
4050                 case 3:
4051                 case 4:
4052                 case 5:
4053                         return ("AMD-K5(r)");
4054                 case 6:
4055                 case 7:
4056                         return ("AMD-K6(r)");
4057                 case 8:
4058                         return ("AMD-K6(r)-2");
4059                 case 9:
4060                         return ("AMD-K6(r)-III");
4061                 default:
4062                         return ("AMD (family 5)");
4063                 }
4064         case 6:
4065                 switch (cpi->cpi_model) {
4066                 case 1:
4067                         return ("AMD-K7(tm)");
4068                 case 0:
4069                 case 2:
4070                 case 4:
4071                         return ("AMD Athlon(tm)");
4072                 case 3:
4073                 case 7:
4074                         return ("AMD Duron(tm)");
4075                 case 6:
4076                 case 8:
4077                 case 10:
4078                         /*
4079                          * Use the L2 cache size to distinguish
4080                          */
4081                         return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
4082                             "AMD Athlon(tm)" : "AMD Duron(tm)");
4083                 default:
4084                         return ("AMD (family 6)");
4085                 }
4086         default:
4087                 break;
4088         }
4089 
4090         if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
4091             cpi->cpi_brandid != 0) {
4092                 switch (BITX(cpi->cpi_brandid, 7, 5)) {
4093                 case 3:
4094                         return ("AMD Opteron(tm) UP 1xx");
4095                 case 4:
4096                         return ("AMD Opteron(tm) DP 2xx");
4097                 case 5:
4098                         return ("AMD Opteron(tm) MP 8xx");
4099                 default:
4100                         return ("AMD Opteron(tm)");
4101                 }
4102         }
4103 
4104         return (NULL);
4105 }
4106 
4107 static const char *
4108 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
4109 {
4110         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
4111             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
4112             type == X86_TYPE_CYRIX_486)
4113                 return ("i486 compatible");
4114 
4115         switch (type) {
4116         case X86_TYPE_CYRIX_6x86:
4117                 return ("Cyrix 6x86");
4118         case X86_TYPE_CYRIX_6x86L:
4119                 return ("Cyrix 6x86L");
4120         case X86_TYPE_CYRIX_6x86MX:
4121                 return ("Cyrix 6x86MX");
4122         case X86_TYPE_CYRIX_GXm:
4123                 return ("Cyrix GXm");
4124         case X86_TYPE_CYRIX_MediaGX:
4125                 return ("Cyrix MediaGX");
4126         case X86_TYPE_CYRIX_MII:
4127                 return ("Cyrix M2");
4128         case X86_TYPE_VIA_CYRIX_III:
4129                 return ("VIA Cyrix M3");
4130         default:
4131                 /*
4132                  * Have another wild guess ..
4133                  */
4134                 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
4135                         return ("Cyrix 5x86");
4136                 else if (cpi->cpi_family == 5) {
4137                         switch (cpi->cpi_model) {
4138                         case 2:
4139                                 return ("Cyrix 6x86");  /* Cyrix M1 */
4140                         case 4:
4141                                 return ("Cyrix MediaGX");
4142                         default:
4143                                 break;
4144                         }
4145                 } else if (cpi->cpi_family == 6) {
4146                         switch (cpi->cpi_model) {
4147                         case 0:
4148                                 return ("Cyrix 6x86MX"); /* Cyrix M2? */
4149                         case 5:
4150                         case 6:
4151                         case 7:
4152                         case 8:
4153                         case 9:
4154                                 return ("VIA C3");
4155                         default:
4156                                 break;
4157                         }
4158                 }
4159                 break;
4160         }
4161         return (NULL);
4162 }
4163 
4164 /*
4165  * This only gets called in the case that the CPU extended
4166  * feature brand string (0x80000002, 0x80000003, 0x80000004)
4167  * aren't available, or contain null bytes for some reason.
4168  */
4169 static void
4170 fabricate_brandstr(struct cpuid_info *cpi)
4171 {
4172         const char *brand = NULL;
4173 
4174         switch (cpi->cpi_vendor) {
4175         case X86_VENDOR_Intel:
4176                 brand = intel_cpubrand(cpi);
4177                 break;
4178         case X86_VENDOR_AMD:
4179                 brand = amd_cpubrand(cpi);
4180                 break;
4181         case X86_VENDOR_Cyrix:
4182                 brand = cyrix_cpubrand(cpi, x86_type);
4183                 break;
4184         case X86_VENDOR_NexGen:
4185                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4186                         brand = "NexGen Nx586";
4187                 break;
4188         case X86_VENDOR_Centaur:
4189                 if (cpi->cpi_family == 5)
4190                         switch (cpi->cpi_model) {
4191                         case 4:
4192                                 brand = "Centaur C6";
4193                                 break;
4194                         case 8:
4195                                 brand = "Centaur C2";
4196                                 break;
4197                         case 9:
4198                                 brand = "Centaur C3";
4199                                 break;
4200                         default:
4201                                 break;
4202                         }
4203                 break;
4204         case X86_VENDOR_Rise:
4205                 if (cpi->cpi_family == 5 &&
4206                     (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4207                         brand = "Rise mP6";
4208                 break;
4209         case X86_VENDOR_SiS:
4210                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4211                         brand = "SiS 55x";
4212                 break;
4213         case X86_VENDOR_TM:
4214                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4215                         brand = "Transmeta Crusoe TM3x00 or TM5x00";
4216                 break;
4217         case X86_VENDOR_NSC:
4218         case X86_VENDOR_UMC:
4219         default:
4220                 break;
4221         }
4222         if (brand) {
4223                 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4224                 return;
4225         }
4226 
4227         /*
4228          * If all else fails ...
4229          */
4230         (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4231             "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4232             cpi->cpi_model, cpi->cpi_step);
4233 }
4234 
4235 /*
4236  * This routine is called just after kernel memory allocation
4237  * becomes available on cpu0, and as part of mp_startup() on
4238  * the other cpus.
4239  *
4240  * Fixup the brand string, and collect any information from cpuid
4241  * that requires dynamically allocated storage to represent.
4242  */
4243 /*ARGSUSED*/
4244 void
4245 cpuid_pass3(cpu_t *cpu)
4246 {
4247         int     i, max, shft, level, size;
4248         struct cpuid_regs regs;
4249         struct cpuid_regs *cp;
4250         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4251 
4252         ASSERT(cpi->cpi_pass == 2);
4253 
4254         /*
4255          * Deterministic cache parameters
4256          *
4257          * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4258          * values that are present are currently defined to be the same. This
4259          * means we can use the same logic to parse it as long as we use the
4260          * appropriate leaf to get the data. If you're updating this, make sure
4261          * you're careful about which vendor supports which aspect.
4262          *
4263          * Take this opportunity to detect the number of threads sharing the
4264          * last level cache, and construct a corresponding cache id. The
4265          * respective cpuid_info members are initialized to the default case of
4266          * "no last level cache sharing".
4267          */
4268         cpi->cpi_ncpu_shr_last_cache = 1;
4269         cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4270 
4271         if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4272             (cpi->cpi_vendor == X86_VENDOR_AMD &&
4273             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4274             is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4275                 uint32_t leaf;
4276 
4277                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4278                         leaf = 4;
4279                 } else {
4280                         leaf = CPUID_LEAF_EXT_1d;
4281                 }
4282 
4283                 /*
4284                  * Find the # of elements (size) returned by the leaf and along
4285                  * the way detect last level cache sharing details.
4286                  */
4287                 bzero(&regs, sizeof (regs));
4288                 cp = &regs;
4289                 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4290                         cp->cp_eax = leaf;
4291                         cp->cp_ecx = i;
4292 
4293                         (void) __cpuid_insn(cp);
4294 
4295                         if (CPI_CACHE_TYPE(cp) == 0)
4296                                 break;
4297                         level = CPI_CACHE_LVL(cp);
4298                         if (level > max) {
4299                                 max = level;
4300                                 cpi->cpi_ncpu_shr_last_cache =
4301                                     CPI_NTHR_SHR_CACHE(cp) + 1;
4302                         }
4303                 }
4304                 cpi->cpi_cache_leaf_size = size = i;
4305 
4306                 /*
4307                  * Allocate the cpi_cache_leaves array. The first element
4308                  * references the regs for the corresponding leaf with %ecx set
4309                  * to 0. This was gathered in cpuid_pass2().
4310                  */
4311                 if (size > 0) {
4312                         cpi->cpi_cache_leaves =
4313                             kmem_alloc(size * sizeof (cp), KM_SLEEP);
4314                         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4315                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4316                         } else {
4317                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4318                         }
4319 
4320                         /*
4321                          * Allocate storage to hold the additional regs
4322                          * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4323                          *
4324                          * The regs for the leaf, %ecx == 0 has already
4325                          * been allocated as indicated above.
4326                          */
4327                         for (i = 1; i < size; i++) {
4328                                 cp = cpi->cpi_cache_leaves[i] =
4329                                     kmem_zalloc(sizeof (regs), KM_SLEEP);
4330                                 cp->cp_eax = leaf;
4331                                 cp->cp_ecx = i;
4332 
4333                                 (void) __cpuid_insn(cp);
4334                         }
4335                 }
4336                 /*
4337                  * Determine the number of bits needed to represent
4338                  * the number of CPUs sharing the last level cache.
4339                  *
4340                  * Shift off that number of bits from the APIC id to
4341                  * derive the cache id.
4342                  */
4343                 shft = 0;
4344                 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4345                         shft++;
4346                 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4347         }
4348 
4349         /*
4350          * Now fixup the brand string
4351          */
4352         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4353                 fabricate_brandstr(cpi);
4354         } else {
4355 
4356                 /*
4357                  * If we successfully extracted a brand string from the cpuid
4358                  * instruction, clean it up by removing leading spaces and
4359                  * similar junk.
4360                  */
4361                 if (cpi->cpi_brandstr[0]) {
4362                         size_t maxlen = sizeof (cpi->cpi_brandstr);
4363                         char *src, *dst;
4364 
4365                         dst = src = (char *)cpi->cpi_brandstr;
4366                         src[maxlen - 1] = '\0';
4367                         /*
4368                          * strip leading spaces
4369                          */
4370                         while (*src == ' ')
4371                                 src++;
4372                         /*
4373                          * Remove any 'Genuine' or "Authentic" prefixes
4374                          */
4375                         if (strncmp(src, "Genuine ", 8) == 0)
4376                                 src += 8;
4377                         if (strncmp(src, "Authentic ", 10) == 0)
4378                                 src += 10;
4379 
4380                         /*
4381                          * Now do an in-place copy.
4382                          * Map (R) to (r) and (TM) to (tm).
4383                          * The era of teletypes is long gone, and there's
4384                          * -really- no need to shout.
4385                          */
4386                         while (*src != '\0') {
4387                                 if (src[0] == '(') {
4388                                         if (strncmp(src + 1, "R)", 2) == 0) {
4389                                                 (void) strncpy(dst, "(r)", 3);
4390                                                 src += 3;
4391                                                 dst += 3;
4392                                                 continue;
4393                                         }
4394                                         if (strncmp(src + 1, "TM)", 3) == 0) {
4395                                                 (void) strncpy(dst, "(tm)", 4);
4396                                                 src += 4;
4397                                                 dst += 4;
4398                                                 continue;
4399                                         }
4400                                 }
4401                                 *dst++ = *src++;
4402                         }
4403                         *dst = '\0';
4404 
4405                         /*
4406                          * Finally, remove any trailing spaces
4407                          */
4408                         while (--dst > cpi->cpi_brandstr)
4409                                 if (*dst == ' ')
4410                                         *dst = '\0';
4411                                 else
4412                                         break;
4413                 } else
4414                         fabricate_brandstr(cpi);
4415         }
4416         cpi->cpi_pass = 3;
4417 }
4418 
4419 /*
4420  * This routine is called out of bind_hwcap() much later in the life
4421  * of the kernel (post_startup()).  The job of this routine is to resolve
4422  * the hardware feature support and kernel support for those features into
4423  * what we're actually going to tell applications via the aux vector.
4424  */
4425 void
4426 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4427 {
4428         struct cpuid_info *cpi;
4429         uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4430 
4431         if (cpu == NULL)
4432                 cpu = CPU;
4433         cpi = cpu->cpu_m.mcpu_cpi;
4434 
4435         ASSERT(cpi->cpi_pass == 3);
4436 
4437         if (cpi->cpi_maxeax >= 1) {
4438                 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4439                 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4440                 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4441 
4442                 *edx = CPI_FEATURES_EDX(cpi);
4443                 *ecx = CPI_FEATURES_ECX(cpi);
4444                 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4445 
4446                 /*
4447                  * [these require explicit kernel support]
4448                  */
4449                 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4450                         *edx &= ~CPUID_INTC_EDX_SEP;
4451 
4452                 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4453                         *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4454                 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4455                         *edx &= ~CPUID_INTC_EDX_SSE2;
4456 
4457                 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4458                         *edx &= ~CPUID_INTC_EDX_HTT;
4459 
4460                 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4461                         *ecx &= ~CPUID_INTC_ECX_SSE3;
4462 
4463                 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4464                         *ecx &= ~CPUID_INTC_ECX_SSSE3;
4465                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4466                         *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4467                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4468                         *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4469                 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4470                         *ecx &= ~CPUID_INTC_ECX_AES;
4471                 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4472                         *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4473                 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4474                         *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4475                             CPUID_INTC_ECX_OSXSAVE);
4476                 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4477                         *ecx &= ~CPUID_INTC_ECX_AVX;
4478                 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4479                         *ecx &= ~CPUID_INTC_ECX_F16C;
4480                 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4481                         *ecx &= ~CPUID_INTC_ECX_FMA;
4482                 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4483                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4484                 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4485                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4486                 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4487                         *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4488                 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4489                         *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4490                 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4491                         *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4492 
4493                 /*
4494                  * [no explicit support required beyond x87 fp context]
4495                  */
4496                 if (!fpu_exists)
4497                         *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4498 
4499                 /*
4500                  * Now map the supported feature vector to things that we
4501                  * think userland will care about.
4502                  */
4503                 if (*edx & CPUID_INTC_EDX_SEP)
4504                         hwcap_flags |= AV_386_SEP;
4505                 if (*edx & CPUID_INTC_EDX_SSE)
4506                         hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4507                 if (*edx & CPUID_INTC_EDX_SSE2)
4508                         hwcap_flags |= AV_386_SSE2;
4509                 if (*ecx & CPUID_INTC_ECX_SSE3)
4510                         hwcap_flags |= AV_386_SSE3;
4511                 if (*ecx & CPUID_INTC_ECX_SSSE3)
4512                         hwcap_flags |= AV_386_SSSE3;
4513                 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4514                         hwcap_flags |= AV_386_SSE4_1;
4515                 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4516                         hwcap_flags |= AV_386_SSE4_2;
4517                 if (*ecx & CPUID_INTC_ECX_MOVBE)
4518                         hwcap_flags |= AV_386_MOVBE;
4519                 if (*ecx & CPUID_INTC_ECX_AES)
4520                         hwcap_flags |= AV_386_AES;
4521                 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4522                         hwcap_flags |= AV_386_PCLMULQDQ;
4523                 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4524                     (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4525                         hwcap_flags |= AV_386_XSAVE;
4526 
4527                         if (*ecx & CPUID_INTC_ECX_AVX) {
4528                                 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4529                                 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4530 
4531                                 hwcap_flags |= AV_386_AVX;
4532                                 if (*ecx & CPUID_INTC_ECX_F16C)
4533                                         hwcap_flags_2 |= AV_386_2_F16C;
4534                                 if (*ecx & CPUID_INTC_ECX_FMA)
4535                                         hwcap_flags_2 |= AV_386_2_FMA;
4536 
4537                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4538                                         hwcap_flags_2 |= AV_386_2_BMI1;
4539                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4540                                         hwcap_flags_2 |= AV_386_2_BMI2;
4541                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4542                                         hwcap_flags_2 |= AV_386_2_AVX2;
4543                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4544                                         hwcap_flags_2 |= AV_386_2_AVX512F;
4545                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4546                                         hwcap_flags_2 |= AV_386_2_AVX512DQ;
4547                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4548                                         hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4549                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4550                                         hwcap_flags_2 |= AV_386_2_AVX512PF;
4551                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4552                                         hwcap_flags_2 |= AV_386_2_AVX512ER;
4553                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4554                                         hwcap_flags_2 |= AV_386_2_AVX512CD;
4555                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4556                                         hwcap_flags_2 |= AV_386_2_AVX512BW;
4557                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4558                                         hwcap_flags_2 |= AV_386_2_AVX512VL;
4559 
4560                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4561                                         hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4562                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4563                                         hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4564                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4565                                         hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4566 
4567                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4568                                         hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4569                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4570                                         hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4571                         }
4572                 }
4573                 if (*ecx & CPUID_INTC_ECX_VMX)
4574                         hwcap_flags |= AV_386_VMX;
4575                 if (*ecx & CPUID_INTC_ECX_POPCNT)
4576                         hwcap_flags |= AV_386_POPCNT;
4577                 if (*edx & CPUID_INTC_EDX_FPU)
4578                         hwcap_flags |= AV_386_FPU;
4579                 if (*edx & CPUID_INTC_EDX_MMX)
4580                         hwcap_flags |= AV_386_MMX;
4581 
4582                 if (*edx & CPUID_INTC_EDX_TSC)
4583                         hwcap_flags |= AV_386_TSC;
4584                 if (*edx & CPUID_INTC_EDX_CX8)
4585                         hwcap_flags |= AV_386_CX8;
4586                 if (*edx & CPUID_INTC_EDX_CMOV)
4587                         hwcap_flags |= AV_386_CMOV;
4588                 if (*ecx & CPUID_INTC_ECX_CX16)
4589                         hwcap_flags |= AV_386_CX16;
4590 
4591                 if (*ecx & CPUID_INTC_ECX_RDRAND)
4592                         hwcap_flags_2 |= AV_386_2_RDRAND;
4593                 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4594                         hwcap_flags_2 |= AV_386_2_ADX;
4595                 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4596                         hwcap_flags_2 |= AV_386_2_RDSEED;
4597                 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4598                         hwcap_flags_2 |= AV_386_2_SHA;
4599                 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4600                         hwcap_flags_2 |= AV_386_2_FSGSBASE;
4601                 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4602                         hwcap_flags_2 |= AV_386_2_CLWB;
4603                 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4604                         hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4605 
4606         }
4607         /*
4608          * Check a few miscilaneous features.
4609          */
4610         if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4611                 hwcap_flags_2 |= AV_386_2_CLZERO;
4612 
4613         if (cpi->cpi_xmaxeax < 0x80000001)
4614                 goto pass4_done;
4615 
4616         switch (cpi->cpi_vendor) {
4617                 struct cpuid_regs cp;
4618                 uint32_t *edx, *ecx;
4619 
4620         case X86_VENDOR_Intel:
4621                 /*
4622                  * Seems like Intel duplicated what we necessary
4623                  * here to make the initial crop of 64-bit OS's work.
4624                  * Hopefully, those are the only "extended" bits
4625                  * they'll add.
4626                  */
4627                 /*FALLTHROUGH*/
4628 
4629         case X86_VENDOR_AMD:
4630                 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4631                 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4632 
4633                 *edx = CPI_FEATURES_XTD_EDX(cpi);
4634                 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4635 
4636                 /*
4637                  * [these features require explicit kernel support]
4638                  */
4639                 switch (cpi->cpi_vendor) {
4640                 case X86_VENDOR_Intel:
4641                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4642                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4643                         break;
4644 
4645                 case X86_VENDOR_AMD:
4646                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4647                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4648                         if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4649                                 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4650                         break;
4651 
4652                 default:
4653                         break;
4654                 }
4655 
4656                 /*
4657                  * [no explicit support required beyond
4658                  * x87 fp context and exception handlers]
4659                  */
4660                 if (!fpu_exists)
4661                         *edx &= ~(CPUID_AMD_EDX_MMXamd |
4662                             CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4663 
4664                 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4665                         *edx &= ~CPUID_AMD_EDX_NX;
4666 #if !defined(__amd64)
4667                 *edx &= ~CPUID_AMD_EDX_LM;
4668 #endif
4669                 /*
4670                  * Now map the supported feature vector to
4671                  * things that we think userland will care about.
4672                  */
4673 #if defined(__amd64)
4674                 if (*edx & CPUID_AMD_EDX_SYSC)
4675                         hwcap_flags |= AV_386_AMD_SYSC;
4676 #endif
4677                 if (*edx & CPUID_AMD_EDX_MMXamd)
4678                         hwcap_flags |= AV_386_AMD_MMX;
4679                 if (*edx & CPUID_AMD_EDX_3DNow)
4680                         hwcap_flags |= AV_386_AMD_3DNow;
4681                 if (*edx & CPUID_AMD_EDX_3DNowx)
4682                         hwcap_flags |= AV_386_AMD_3DNowx;
4683                 if (*ecx & CPUID_AMD_ECX_SVM)
4684                         hwcap_flags |= AV_386_AMD_SVM;
4685 
4686                 switch (cpi->cpi_vendor) {
4687                 case X86_VENDOR_AMD:
4688                         if (*edx & CPUID_AMD_EDX_TSCP)
4689                                 hwcap_flags |= AV_386_TSCP;
4690                         if (*ecx & CPUID_AMD_ECX_AHF64)
4691                                 hwcap_flags |= AV_386_AHF;
4692                         if (*ecx & CPUID_AMD_ECX_SSE4A)
4693                                 hwcap_flags |= AV_386_AMD_SSE4A;
4694                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4695                                 hwcap_flags |= AV_386_AMD_LZCNT;
4696                         if (*ecx & CPUID_AMD_ECX_MONITORX)
4697                                 hwcap_flags_2 |= AV_386_2_MONITORX;
4698                         break;
4699 
4700                 case X86_VENDOR_Intel:
4701                         if (*edx & CPUID_AMD_EDX_TSCP)
4702                                 hwcap_flags |= AV_386_TSCP;
4703                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4704                                 hwcap_flags |= AV_386_AMD_LZCNT;
4705                         /*
4706                          * Aarrgh.
4707                          * Intel uses a different bit in the same word.
4708                          */
4709                         if (*ecx & CPUID_INTC_ECX_AHF64)
4710                                 hwcap_flags |= AV_386_AHF;
4711                         break;
4712 
4713                 default:
4714                         break;
4715                 }
4716                 break;
4717 
4718         case X86_VENDOR_TM:
4719                 cp.cp_eax = 0x80860001;
4720                 (void) __cpuid_insn(&cp);
4721                 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4722                 break;
4723 
4724         default:
4725                 break;
4726         }
4727 
4728 pass4_done:
4729         cpi->cpi_pass = 4;
4730         if (hwcap_out != NULL) {
4731                 hwcap_out[0] = hwcap_flags;
4732                 hwcap_out[1] = hwcap_flags_2;
4733         }
4734 }
4735 
4736 
4737 /*
4738  * Simulate the cpuid instruction using the data we previously
4739  * captured about this CPU.  We try our best to return the truth
4740  * about the hardware, independently of kernel support.
4741  */
4742 uint32_t
4743 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4744 {
4745         struct cpuid_info *cpi;
4746         struct cpuid_regs *xcp;
4747 
4748         if (cpu == NULL)
4749                 cpu = CPU;
4750         cpi = cpu->cpu_m.mcpu_cpi;
4751 
4752         ASSERT(cpuid_checkpass(cpu, 3));
4753 
4754         /*
4755          * CPUID data is cached in two separate places: cpi_std for standard
4756          * CPUID leaves , and cpi_extd for extended CPUID leaves.
4757          */
4758         if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4759                 xcp = &cpi->cpi_std[cp->cp_eax];
4760         } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4761             cp->cp_eax <= cpi->cpi_xmaxeax &&
4762             cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4763                 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4764         } else {
4765                 /*
4766                  * The caller is asking for data from an input parameter which
4767                  * the kernel has not cached.  In this case we go fetch from
4768                  * the hardware and return the data directly to the user.
4769                  */
4770                 return (__cpuid_insn(cp));
4771         }
4772 
4773         cp->cp_eax = xcp->cp_eax;
4774         cp->cp_ebx = xcp->cp_ebx;
4775         cp->cp_ecx = xcp->cp_ecx;
4776         cp->cp_edx = xcp->cp_edx;
4777         return (cp->cp_eax);
4778 }
4779 
4780 int
4781 cpuid_checkpass(cpu_t *cpu, int pass)
4782 {
4783         return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4784             cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4785 }
4786 
4787 int
4788 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4789 {
4790         ASSERT(cpuid_checkpass(cpu, 3));
4791 
4792         return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4793 }
4794 
4795 int
4796 cpuid_is_cmt(cpu_t *cpu)
4797 {
4798         if (cpu == NULL)
4799                 cpu = CPU;
4800 
4801         ASSERT(cpuid_checkpass(cpu, 1));
4802 
4803         return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4804 }
4805 
4806 /*
4807  * AMD and Intel both implement the 64-bit variant of the syscall
4808  * instruction (syscallq), so if there's -any- support for syscall,
4809  * cpuid currently says "yes, we support this".
4810  *
4811  * However, Intel decided to -not- implement the 32-bit variant of the
4812  * syscall instruction, so we provide a predicate to allow our caller
4813  * to test that subtlety here.
4814  *
4815  * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4816  *      even in the case where the hardware would in fact support it.
4817  */
4818 /*ARGSUSED*/
4819 int
4820 cpuid_syscall32_insn(cpu_t *cpu)
4821 {
4822         ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4823 
4824 #if !defined(__xpv)
4825         if (cpu == NULL)
4826                 cpu = CPU;
4827 
4828         /*CSTYLED*/
4829         {
4830                 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4831 
4832                 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4833                     cpi->cpi_xmaxeax >= 0x80000001 &&
4834                     (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4835                         return (1);
4836         }
4837 #endif
4838         return (0);
4839 }
4840 
4841 int
4842 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4843 {
4844         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4845 
4846         static const char fmt[] =
4847             "x86 (%s %X family %d model %d step %d clock %d MHz)";
4848         static const char fmt_ht[] =
4849             "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4850 
4851         ASSERT(cpuid_checkpass(cpu, 1));
4852 
4853         if (cpuid_is_cmt(cpu))
4854                 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4855                     cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4856                     cpi->cpi_family, cpi->cpi_model,
4857                     cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4858         return (snprintf(s, n, fmt,
4859             cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4860             cpi->cpi_family, cpi->cpi_model,
4861             cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4862 }
4863 
4864 const char *
4865 cpuid_getvendorstr(cpu_t *cpu)
4866 {
4867         ASSERT(cpuid_checkpass(cpu, 1));
4868         return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4869 }
4870 
4871 uint_t
4872 cpuid_getvendor(cpu_t *cpu)
4873 {
4874         ASSERT(cpuid_checkpass(cpu, 1));
4875         return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4876 }
4877 
4878 uint_t
4879 cpuid_getfamily(cpu_t *cpu)
4880 {
4881         ASSERT(cpuid_checkpass(cpu, 1));
4882         return (cpu->cpu_m.mcpu_cpi->cpi_family);
4883 }
4884 
4885 uint_t
4886 cpuid_getmodel(cpu_t *cpu)
4887 {
4888         ASSERT(cpuid_checkpass(cpu, 1));
4889         return (cpu->cpu_m.mcpu_cpi->cpi_model);
4890 }
4891 
4892 uint_t
4893 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4894 {
4895         ASSERT(cpuid_checkpass(cpu, 1));
4896         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4897 }
4898 
4899 uint_t
4900 cpuid_get_ncore_per_chip(cpu_t *cpu)
4901 {
4902         ASSERT(cpuid_checkpass(cpu, 1));
4903         return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4904 }
4905 
4906 uint_t
4907 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4908 {
4909         ASSERT(cpuid_checkpass(cpu, 2));
4910         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4911 }
4912 
4913 id_t
4914 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4915 {
4916         ASSERT(cpuid_checkpass(cpu, 2));
4917         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4918 }
4919 
4920 uint_t
4921 cpuid_getstep(cpu_t *cpu)
4922 {
4923         ASSERT(cpuid_checkpass(cpu, 1));
4924         return (cpu->cpu_m.mcpu_cpi->cpi_step);
4925 }
4926 
4927 uint_t
4928 cpuid_getsig(struct cpu *cpu)
4929 {
4930         ASSERT(cpuid_checkpass(cpu, 1));
4931         return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4932 }
4933 
4934 uint32_t
4935 cpuid_getchiprev(struct cpu *cpu)
4936 {
4937         ASSERT(cpuid_checkpass(cpu, 1));
4938         return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4939 }
4940 
4941 const char *
4942 cpuid_getchiprevstr(struct cpu *cpu)
4943 {
4944         ASSERT(cpuid_checkpass(cpu, 1));
4945         return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4946 }
4947 
4948 uint32_t
4949 cpuid_getsockettype(struct cpu *cpu)
4950 {
4951         ASSERT(cpuid_checkpass(cpu, 1));
4952         return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4953 }
4954 
4955 const char *
4956 cpuid_getsocketstr(cpu_t *cpu)
4957 {
4958         static const char *socketstr = NULL;
4959         struct cpuid_info *cpi;
4960 
4961         ASSERT(cpuid_checkpass(cpu, 1));
4962         cpi = cpu->cpu_m.mcpu_cpi;
4963 
4964         /* Assume that socket types are the same across the system */
4965         if (socketstr == NULL)
4966                 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4967                     cpi->cpi_model, cpi->cpi_step);
4968 
4969 
4970         return (socketstr);
4971 }
4972 
4973 int
4974 cpuid_get_chipid(cpu_t *cpu)
4975 {
4976         ASSERT(cpuid_checkpass(cpu, 1));
4977 
4978         if (cpuid_is_cmt(cpu))
4979                 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4980         return (cpu->cpu_id);
4981 }
4982 
4983 id_t
4984 cpuid_get_coreid(cpu_t *cpu)
4985 {
4986         ASSERT(cpuid_checkpass(cpu, 1));
4987         return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4988 }
4989 
4990 int
4991 cpuid_get_pkgcoreid(cpu_t *cpu)
4992 {
4993         ASSERT(cpuid_checkpass(cpu, 1));
4994         return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4995 }
4996 
4997 int
4998 cpuid_get_clogid(cpu_t *cpu)
4999 {
5000         ASSERT(cpuid_checkpass(cpu, 1));
5001         return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
5002 }
5003 
5004 int
5005 cpuid_get_cacheid(cpu_t *cpu)
5006 {
5007         ASSERT(cpuid_checkpass(cpu, 1));
5008         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
5009 }
5010 
5011 uint_t
5012 cpuid_get_procnodeid(cpu_t *cpu)
5013 {
5014         ASSERT(cpuid_checkpass(cpu, 1));
5015         return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
5016 }
5017 
5018 uint_t
5019 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
5020 {
5021         ASSERT(cpuid_checkpass(cpu, 1));
5022         return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
5023 }
5024 
5025 uint_t
5026 cpuid_get_compunitid(cpu_t *cpu)
5027 {
5028         ASSERT(cpuid_checkpass(cpu, 1));
5029         return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
5030 }
5031 
5032 uint_t
5033 cpuid_get_cores_per_compunit(cpu_t *cpu)
5034 {
5035         ASSERT(cpuid_checkpass(cpu, 1));
5036         return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
5037 }
5038 
5039 /*ARGSUSED*/
5040 int
5041 cpuid_have_cr8access(cpu_t *cpu)
5042 {
5043 #if defined(__amd64)
5044         return (1);
5045 #else
5046         struct cpuid_info *cpi;
5047 
5048         ASSERT(cpu != NULL);
5049         cpi = cpu->cpu_m.mcpu_cpi;
5050         if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
5051             (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
5052                 return (1);
5053         return (0);
5054 #endif
5055 }
5056 
5057 uint32_t
5058 cpuid_get_apicid(cpu_t *cpu)
5059 {
5060         ASSERT(cpuid_checkpass(cpu, 1));
5061         if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
5062                 return (UINT32_MAX);
5063         } else {
5064                 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
5065         }
5066 }
5067 
5068 void
5069 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
5070 {
5071         struct cpuid_info *cpi;
5072 
5073         if (cpu == NULL)
5074                 cpu = CPU;
5075         cpi = cpu->cpu_m.mcpu_cpi;
5076 
5077         ASSERT(cpuid_checkpass(cpu, 1));
5078 
5079         if (pabits)
5080                 *pabits = cpi->cpi_pabits;
5081         if (vabits)
5082                 *vabits = cpi->cpi_vabits;
5083 }
5084 
5085 size_t
5086 cpuid_get_xsave_size()
5087 {
5088         return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
5089             sizeof (struct xsave_state)));
5090 }
5091 
5092 /*
5093  * Return true if the CPUs on this system require 'pointer clearing' for the
5094  * floating point error pointer exception handling. In the past, this has been
5095  * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
5096  * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
5097  * feature bit and is reflected in the cpi_fp_amd_save member.
5098  */
5099 boolean_t
5100 cpuid_need_fp_excp_handling()
5101 {
5102         return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
5103             cpuid_info0.cpi_fp_amd_save != 0);
5104 }
5105 
5106 /*
5107  * Returns the number of data TLB entries for a corresponding
5108  * pagesize.  If it can't be computed, or isn't known, the
5109  * routine returns zero.  If you ask about an architecturally
5110  * impossible pagesize, the routine will panic (so that the
5111  * hat implementor knows that things are inconsistent.)
5112  */
5113 uint_t
5114 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
5115 {
5116         struct cpuid_info *cpi;
5117         uint_t dtlb_nent = 0;
5118 
5119         if (cpu == NULL)
5120                 cpu = CPU;
5121         cpi = cpu->cpu_m.mcpu_cpi;
5122 
5123         ASSERT(cpuid_checkpass(cpu, 1));
5124 
5125         /*
5126          * Check the L2 TLB info
5127          */
5128         if (cpi->cpi_xmaxeax >= 0x80000006) {
5129                 struct cpuid_regs *cp = &cpi->cpi_extd[6];
5130 
5131                 switch (pagesize) {
5132 
5133                 case 4 * 1024:
5134                         /*
5135                          * All zero in the top 16 bits of the register
5136                          * indicates a unified TLB. Size is in low 16 bits.
5137                          */
5138                         if ((cp->cp_ebx & 0xffff0000) == 0)
5139                                 dtlb_nent = cp->cp_ebx & 0x0000ffff;
5140                         else
5141                                 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
5142                         break;
5143 
5144                 case 2 * 1024 * 1024:
5145                         if ((cp->cp_eax & 0xffff0000) == 0)
5146                                 dtlb_nent = cp->cp_eax & 0x0000ffff;
5147                         else
5148                                 dtlb_nent = BITX(cp->cp_eax, 27, 16);
5149                         break;
5150 
5151                 default:
5152                         panic("unknown L2 pagesize");
5153                         /*NOTREACHED*/
5154                 }
5155         }
5156 
5157         if (dtlb_nent != 0)
5158                 return (dtlb_nent);
5159 
5160         /*
5161          * No L2 TLB support for this size, try L1.
5162          */
5163         if (cpi->cpi_xmaxeax >= 0x80000005) {
5164                 struct cpuid_regs *cp = &cpi->cpi_extd[5];
5165 
5166                 switch (pagesize) {
5167                 case 4 * 1024:
5168                         dtlb_nent = BITX(cp->cp_ebx, 23, 16);
5169                         break;
5170                 case 2 * 1024 * 1024:
5171                         dtlb_nent = BITX(cp->cp_eax, 23, 16);
5172                         break;
5173                 default:
5174                         panic("unknown L1 d-TLB pagesize");
5175                         /*NOTREACHED*/
5176                 }
5177         }
5178 
5179         return (dtlb_nent);
5180 }
5181 
5182 /*
5183  * Return 0 if the erratum is not present or not applicable, positive
5184  * if it is, and negative if the status of the erratum is unknown.
5185  *
5186  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
5187  * Processors" #25759, Rev 3.57, August 2005
5188  */
5189 int
5190 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
5191 {
5192         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
5193         uint_t eax;
5194 
5195         /*
5196          * Bail out if this CPU isn't an AMD CPU, or if it's
5197          * a legacy (32-bit) AMD CPU.
5198          */
5199         if (cpi->cpi_vendor != X86_VENDOR_AMD ||
5200             cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
5201             cpi->cpi_family == 6) {
5202                 return (0);
5203         }
5204 
5205         eax = cpi->cpi_std[1].cp_eax;
5206 
5207 #define SH_B0(eax)      (eax == 0xf40 || eax == 0xf50)
5208 #define SH_B3(eax)      (eax == 0xf51)
5209 #define B(eax)          (SH_B0(eax) || SH_B3(eax))
5210 
5211 #define SH_C0(eax)      (eax == 0xf48 || eax == 0xf58)
5212 
5213 #define SH_CG(eax)      (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5214 #define DH_CG(eax)      (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5215 #define CH_CG(eax)      (eax == 0xf82 || eax == 0xfb2)
5216 #define CG(eax)         (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5217 
5218 #define SH_D0(eax)      (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5219 #define DH_D0(eax)      (eax == 0x10fc0 || eax == 0x10ff0)
5220 #define CH_D0(eax)      (eax == 0x10f80 || eax == 0x10fb0)
5221 #define D0(eax)         (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5222 
5223 #define SH_E0(eax)      (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5224 #define JH_E1(eax)      (eax == 0x20f10)        /* JH8_E0 had 0x20f30 */
5225 #define DH_E3(eax)      (eax == 0x20fc0 || eax == 0x20ff0)
5226 #define SH_E4(eax)      (eax == 0x20f51 || eax == 0x20f71)
5227 #define BH_E4(eax)      (eax == 0x20fb1)
5228 #define SH_E5(eax)      (eax == 0x20f42)
5229 #define DH_E6(eax)      (eax == 0x20ff2 || eax == 0x20fc2)
5230 #define JH_E6(eax)      (eax == 0x20f12 || eax == 0x20f32)
5231 #define EX(eax)         (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5232                             SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5233                             DH_E6(eax) || JH_E6(eax))
5234 
5235 #define DR_AX(eax)      (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5236 #define DR_B0(eax)      (eax == 0x100f20)
5237 #define DR_B1(eax)      (eax == 0x100f21)
5238 #define DR_BA(eax)      (eax == 0x100f2a)
5239 #define DR_B2(eax)      (eax == 0x100f22)
5240 #define DR_B3(eax)      (eax == 0x100f23)
5241 #define RB_C0(eax)      (eax == 0x100f40)
5242 
5243         switch (erratum) {
5244         case 1:
5245                 return (cpi->cpi_family < 0x10);
5246         case 51:        /* what does the asterisk mean? */
5247                 return (B(eax) || SH_C0(eax) || CG(eax));
5248         case 52:
5249                 return (B(eax));
5250         case 57:
5251                 return (cpi->cpi_family <= 0x11);
5252         case 58:
5253                 return (B(eax));
5254         case 60:
5255                 return (cpi->cpi_family <= 0x11);
5256         case 61:
5257         case 62:
5258         case 63:
5259         case 64:
5260         case 65:
5261         case 66:
5262         case 68:
5263         case 69:
5264         case 70:
5265         case 71:
5266                 return (B(eax));
5267         case 72:
5268                 return (SH_B0(eax));
5269         case 74:
5270                 return (B(eax));
5271         case 75:
5272                 return (cpi->cpi_family < 0x10);
5273         case 76:
5274                 return (B(eax));
5275         case 77:
5276                 return (cpi->cpi_family <= 0x11);
5277         case 78:
5278                 return (B(eax) || SH_C0(eax));
5279         case 79:
5280                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5281         case 80:
5282         case 81:
5283         case 82:
5284                 return (B(eax));
5285         case 83:
5286                 return (B(eax) || SH_C0(eax) || CG(eax));
5287         case 85:
5288                 return (cpi->cpi_family < 0x10);
5289         case 86:
5290                 return (SH_C0(eax) || CG(eax));
5291         case 88:
5292 #if !defined(__amd64)
5293                 return (0);
5294 #else
5295                 return (B(eax) || SH_C0(eax));
5296 #endif
5297         case 89:
5298                 return (cpi->cpi_family < 0x10);
5299         case 90:
5300                 return (B(eax) || SH_C0(eax) || CG(eax));
5301         case 91:
5302         case 92:
5303                 return (B(eax) || SH_C0(eax));
5304         case 93:
5305                 return (SH_C0(eax));
5306         case 94:
5307                 return (B(eax) || SH_C0(eax) || CG(eax));
5308         case 95:
5309 #if !defined(__amd64)
5310                 return (0);
5311 #else
5312                 return (B(eax) || SH_C0(eax));
5313 #endif
5314         case 96:
5315                 return (B(eax) || SH_C0(eax) || CG(eax));
5316         case 97:
5317         case 98:
5318                 return (SH_C0(eax) || CG(eax));
5319         case 99:
5320                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5321         case 100:
5322                 return (B(eax) || SH_C0(eax));
5323         case 101:
5324         case 103:
5325                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5326         case 104:
5327                 return (SH_C0(eax) || CG(eax) || D0(eax));
5328         case 105:
5329         case 106:
5330         case 107:
5331                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5332         case 108:
5333                 return (DH_CG(eax));
5334         case 109:
5335                 return (SH_C0(eax) || CG(eax) || D0(eax));
5336         case 110:
5337                 return (D0(eax) || EX(eax));
5338         case 111:
5339                 return (CG(eax));
5340         case 112:
5341                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5342         case 113:
5343                 return (eax == 0x20fc0);
5344         case 114:
5345                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5346         case 115:
5347                 return (SH_E0(eax) || JH_E1(eax));
5348         case 116:
5349                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5350         case 117:
5351                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5352         case 118:
5353                 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5354                     JH_E6(eax));
5355         case 121:
5356                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5357         case 122:
5358                 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5359         case 123:
5360                 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5361         case 131:
5362                 return (cpi->cpi_family < 0x10);
5363         case 6336786:
5364 
5365                 /*
5366                  * Test for AdvPowerMgmtInfo.TscPStateInvariant
5367                  * if this is a K8 family or newer processor. We're testing for
5368                  * this 'erratum' to determine whether or not we have a constant
5369                  * TSC.
5370                  *
5371                  * Our current fix for this is to disable the C1-Clock ramping.
5372                  * However, this doesn't work on newer processor families nor
5373                  * does it work when virtualized as those devices don't exist.
5374                  */
5375                 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5376                         return (0);
5377                 }
5378 
5379                 if (CPI_FAMILY(cpi) == 0xf) {
5380                         struct cpuid_regs regs;
5381                         regs.cp_eax = 0x80000007;
5382                         (void) __cpuid_insn(&regs);
5383                         return (!(regs.cp_edx & 0x100));
5384                 }
5385                 return (0);
5386         case 6323525:
5387                 /*
5388                  * This erratum (K8 #147) is not present on family 10 and newer.
5389                  */
5390                 if (cpi->cpi_family >= 0x10) {
5391                         return (0);
5392                 }
5393                 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5394                     (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5395 
5396         case 6671130:
5397                 /*
5398                  * check for processors (pre-Shanghai) that do not provide
5399                  * optimal management of 1gb ptes in its tlb.
5400                  */
5401                 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5402 
5403         case 298:
5404                 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5405                     DR_B2(eax) || RB_C0(eax));
5406 
5407         case 721:
5408 #if defined(__amd64)
5409                 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5410 #else
5411                 return (0);
5412 #endif
5413 
5414         default:
5415                 return (-1);
5416 
5417         }
5418 }
5419 
5420 /*
5421  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5422  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5423  */
5424 int
5425 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5426 {
5427         struct cpuid_info       *cpi;
5428         uint_t                  osvwid;
5429         static int              osvwfeature = -1;
5430         uint64_t                osvwlength;
5431 
5432 
5433         cpi = cpu->cpu_m.mcpu_cpi;
5434 
5435         /* confirm OSVW supported */
5436         if (osvwfeature == -1) {
5437                 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5438         } else {
5439                 /* assert that osvw feature setting is consistent on all cpus */
5440                 ASSERT(osvwfeature ==
5441                     (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5442         }
5443         if (!osvwfeature)
5444                 return (-1);
5445 
5446         osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5447 
5448         switch (erratum) {
5449         case 298:       /* osvwid is 0 */
5450                 osvwid = 0;
5451                 if (osvwlength <= (uint64_t)osvwid) {
5452                         /* osvwid 0 is unknown */
5453                         return (-1);
5454                 }
5455 
5456                 /*
5457                  * Check the OSVW STATUS MSR to determine the state
5458                  * of the erratum where:
5459                  *   0 - fixed by HW
5460                  *   1 - BIOS has applied the workaround when BIOS
5461                  *   workaround is available. (Or for other errata,
5462                  *   OS workaround is required.)
5463                  * For a value of 1, caller will confirm that the
5464                  * erratum 298 workaround has indeed been applied by BIOS.
5465                  *
5466                  * A 1 may be set in cpus that have a HW fix
5467                  * in a mixed cpu system. Regarding erratum 298:
5468                  *   In a multiprocessor platform, the workaround above
5469                  *   should be applied to all processors regardless of
5470                  *   silicon revision when an affected processor is
5471                  *   present.
5472                  */
5473 
5474                 return (rdmsr(MSR_AMD_OSVW_STATUS +
5475                     (osvwid / OSVW_ID_CNT_PER_MSR)) &
5476                     (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5477 
5478         default:
5479                 return (-1);
5480         }
5481 }
5482 
5483 static const char assoc_str[] = "associativity";
5484 static const char line_str[] = "line-size";
5485 static const char size_str[] = "size";
5486 
5487 static void
5488 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5489     uint32_t val)
5490 {
5491         char buf[128];
5492 
5493         /*
5494          * ndi_prop_update_int() is used because it is desirable for
5495          * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5496          */
5497         if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5498                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5499 }
5500 
5501 /*
5502  * Intel-style cache/tlb description
5503  *
5504  * Standard cpuid level 2 gives a randomly ordered
5505  * selection of tags that index into a table that describes
5506  * cache and tlb properties.
5507  */
5508 
5509 static const char l1_icache_str[] = "l1-icache";
5510 static const char l1_dcache_str[] = "l1-dcache";
5511 static const char l2_cache_str[] = "l2-cache";
5512 static const char l3_cache_str[] = "l3-cache";
5513 static const char itlb4k_str[] = "itlb-4K";
5514 static const char dtlb4k_str[] = "dtlb-4K";
5515 static const char itlb2M_str[] = "itlb-2M";
5516 static const char itlb4M_str[] = "itlb-4M";
5517 static const char dtlb4M_str[] = "dtlb-4M";
5518 static const char dtlb24_str[] = "dtlb0-2M-4M";
5519 static const char itlb424_str[] = "itlb-4K-2M-4M";
5520 static const char itlb24_str[] = "itlb-2M-4M";
5521 static const char dtlb44_str[] = "dtlb-4K-4M";
5522 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5523 static const char sl2_cache_str[] = "sectored-l2-cache";
5524 static const char itrace_str[] = "itrace-cache";
5525 static const char sl3_cache_str[] = "sectored-l3-cache";
5526 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5527 
5528 static const struct cachetab {
5529         uint8_t         ct_code;
5530         uint8_t         ct_assoc;
5531         uint16_t        ct_line_size;
5532         size_t          ct_size;
5533         const char      *ct_label;
5534 } intel_ctab[] = {
5535         /*
5536          * maintain descending order!
5537          *
5538          * Codes ignored - Reason
5539          * ----------------------
5540          * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5541          * f0H/f1H - Currently we do not interpret prefetch size by design
5542          */
5543         { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5544         { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5545         { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5546         { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5547         { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5548         { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5549         { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5550         { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5551         { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5552         { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5553         { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5554         { 0xd0, 4, 64, 512*1024, l3_cache_str},
5555         { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5556         { 0xc0, 4, 0, 8, dtlb44_str },
5557         { 0xba, 4, 0, 64, dtlb4k_str },
5558         { 0xb4, 4, 0, 256, dtlb4k_str },
5559         { 0xb3, 4, 0, 128, dtlb4k_str },
5560         { 0xb2, 4, 0, 64, itlb4k_str },
5561         { 0xb0, 4, 0, 128, itlb4k_str },
5562         { 0x87, 8, 64, 1024*1024, l2_cache_str},
5563         { 0x86, 4, 64, 512*1024, l2_cache_str},
5564         { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5565         { 0x84, 8, 32, 1024*1024, l2_cache_str},
5566         { 0x83, 8, 32, 512*1024, l2_cache_str},
5567         { 0x82, 8, 32, 256*1024, l2_cache_str},
5568         { 0x80, 8, 64, 512*1024, l2_cache_str},
5569         { 0x7f, 2, 64, 512*1024, l2_cache_str},
5570         { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5571         { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5572         { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5573         { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5574         { 0x79, 8, 64, 128*1024, sl2_cache_str},
5575         { 0x78, 8, 64, 1024*1024, l2_cache_str},
5576         { 0x73, 8, 0, 64*1024, itrace_str},
5577         { 0x72, 8, 0, 32*1024, itrace_str},
5578         { 0x71, 8, 0, 16*1024, itrace_str},
5579         { 0x70, 8, 0, 12*1024, itrace_str},
5580         { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5581         { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5582         { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5583         { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5584         { 0x5d, 0, 0, 256, dtlb44_str},
5585         { 0x5c, 0, 0, 128, dtlb44_str},
5586         { 0x5b, 0, 0, 64, dtlb44_str},
5587         { 0x5a, 4, 0, 32, dtlb24_str},
5588         { 0x59, 0, 0, 16, dtlb4k_str},
5589         { 0x57, 4, 0, 16, dtlb4k_str},
5590         { 0x56, 4, 0, 16, dtlb4M_str},
5591         { 0x55, 0, 0, 7, itlb24_str},
5592         { 0x52, 0, 0, 256, itlb424_str},
5593         { 0x51, 0, 0, 128, itlb424_str},
5594         { 0x50, 0, 0, 64, itlb424_str},
5595         { 0x4f, 0, 0, 32, itlb4k_str},
5596         { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5597         { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5598         { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5599         { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5600         { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5601         { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5602         { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5603         { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5604         { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5605         { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5606         { 0x44, 4, 32, 1024*1024, l2_cache_str},
5607         { 0x43, 4, 32, 512*1024, l2_cache_str},
5608         { 0x42, 4, 32, 256*1024, l2_cache_str},
5609         { 0x41, 4, 32, 128*1024, l2_cache_str},
5610         { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5611         { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5612         { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5613         { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5614         { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5615         { 0x39, 4, 64, 128*1024, sl2_cache_str},
5616         { 0x30, 8, 64, 32*1024, l1_icache_str},
5617         { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5618         { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5619         { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5620         { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5621         { 0x22, 4, 64, 512*1024, sl3_cache_str},
5622         { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5623         { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5624         { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5625         { 0x0b, 4, 0, 4, itlb4M_str},
5626         { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5627         { 0x08, 4, 32, 16*1024, l1_icache_str},
5628         { 0x06, 4, 32, 8*1024, l1_icache_str},
5629         { 0x05, 4, 0, 32, dtlb4M_str},
5630         { 0x04, 4, 0, 8, dtlb4M_str},
5631         { 0x03, 4, 0, 64, dtlb4k_str},
5632         { 0x02, 4, 0, 2, itlb4M_str},
5633         { 0x01, 4, 0, 32, itlb4k_str},
5634         { 0 }
5635 };
5636 
5637 static const struct cachetab cyrix_ctab[] = {
5638         { 0x70, 4, 0, 32, "tlb-4K" },
5639         { 0x80, 4, 16, 16*1024, "l1-cache" },
5640         { 0 }
5641 };
5642 
5643 /*
5644  * Search a cache table for a matching entry
5645  */
5646 static const struct cachetab *
5647 find_cacheent(const struct cachetab *ct, uint_t code)
5648 {
5649         if (code != 0) {
5650                 for (; ct->ct_code != 0; ct++)
5651                         if (ct->ct_code <= code)
5652                                 break;
5653                 if (ct->ct_code == code)
5654                         return (ct);
5655         }
5656         return (NULL);
5657 }
5658 
5659 /*
5660  * Populate cachetab entry with L2 or L3 cache-information using
5661  * cpuid function 4. This function is called from intel_walk_cacheinfo()
5662  * when descriptor 0x49 is encountered. It returns 0 if no such cache
5663  * information is found.
5664  */
5665 static int
5666 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5667 {
5668         uint32_t level, i;
5669         int ret = 0;
5670 
5671         for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5672                 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5673 
5674                 if (level == 2 || level == 3) {
5675                         ct->ct_assoc =
5676                             CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5677                         ct->ct_line_size =
5678                             CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5679                         ct->ct_size = ct->ct_assoc *
5680                             (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5681                             ct->ct_line_size *
5682                             (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5683 
5684                         if (level == 2) {
5685                                 ct->ct_label = l2_cache_str;
5686                         } else if (level == 3) {
5687                                 ct->ct_label = l3_cache_str;
5688                         }
5689                         ret = 1;
5690                 }
5691         }
5692 
5693         return (ret);
5694 }
5695 
5696 /*
5697  * Walk the cacheinfo descriptor, applying 'func' to every valid element
5698  * The walk is terminated if the walker returns non-zero.
5699  */
5700 static void
5701 intel_walk_cacheinfo(struct cpuid_info *cpi,
5702     void *arg, int (*func)(void *, const struct cachetab *))
5703 {
5704         const struct cachetab *ct;
5705         struct cachetab des_49_ct, des_b1_ct;
5706         uint8_t *dp;
5707         int i;
5708 
5709         if ((dp = cpi->cpi_cacheinfo) == NULL)
5710                 return;
5711         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5712                 /*
5713                  * For overloaded descriptor 0x49 we use cpuid function 4
5714                  * if supported by the current processor, to create
5715                  * cache information.
5716                  * For overloaded descriptor 0xb1 we use X86_PAE flag
5717                  * to disambiguate the cache information.
5718                  */
5719                 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5720                     intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5721                                 ct = &des_49_ct;
5722                 } else if (*dp == 0xb1) {
5723                         des_b1_ct.ct_code = 0xb1;
5724                         des_b1_ct.ct_assoc = 4;
5725                         des_b1_ct.ct_line_size = 0;
5726                         if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5727                                 des_b1_ct.ct_size = 8;
5728                                 des_b1_ct.ct_label = itlb2M_str;
5729                         } else {
5730                                 des_b1_ct.ct_size = 4;
5731                                 des_b1_ct.ct_label = itlb4M_str;
5732                         }
5733                         ct = &des_b1_ct;
5734                 } else {
5735                         if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5736                                 continue;
5737                         }
5738                 }
5739 
5740                 if (func(arg, ct) != 0) {
5741                         break;
5742                 }
5743         }
5744 }
5745 
5746 /*
5747  * (Like the Intel one, except for Cyrix CPUs)
5748  */
5749 static void
5750 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5751     void *arg, int (*func)(void *, const struct cachetab *))
5752 {
5753         const struct cachetab *ct;
5754         uint8_t *dp;
5755         int i;
5756 
5757         if ((dp = cpi->cpi_cacheinfo) == NULL)
5758                 return;
5759         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5760                 /*
5761                  * Search Cyrix-specific descriptor table first ..
5762                  */
5763                 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5764                         if (func(arg, ct) != 0)
5765                                 break;
5766                         continue;
5767                 }
5768                 /*
5769                  * .. else fall back to the Intel one
5770                  */
5771                 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5772                         if (func(arg, ct) != 0)
5773                                 break;
5774                         continue;
5775                 }
5776         }
5777 }
5778 
5779 /*
5780  * A cacheinfo walker that adds associativity, line-size, and size properties
5781  * to the devinfo node it is passed as an argument.
5782  */
5783 static int
5784 add_cacheent_props(void *arg, const struct cachetab *ct)
5785 {
5786         dev_info_t *devi = arg;
5787 
5788         add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5789         if (ct->ct_line_size != 0)
5790                 add_cache_prop(devi, ct->ct_label, line_str,
5791                     ct->ct_line_size);
5792         add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5793         return (0);
5794 }
5795 
5796 
5797 static const char fully_assoc[] = "fully-associative?";
5798 
5799 /*
5800  * AMD style cache/tlb description
5801  *
5802  * Extended functions 5 and 6 directly describe properties of
5803  * tlbs and various cache levels.
5804  */
5805 static void
5806 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5807 {
5808         switch (assoc) {
5809         case 0: /* reserved; ignore */
5810                 break;
5811         default:
5812                 add_cache_prop(devi, label, assoc_str, assoc);
5813                 break;
5814         case 0xff:
5815                 add_cache_prop(devi, label, fully_assoc, 1);
5816                 break;
5817         }
5818 }
5819 
5820 static void
5821 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5822 {
5823         if (size == 0)
5824                 return;
5825         add_cache_prop(devi, label, size_str, size);
5826         add_amd_assoc(devi, label, assoc);
5827 }
5828 
5829 static void
5830 add_amd_cache(dev_info_t *devi, const char *label,
5831     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5832 {
5833         if (size == 0 || line_size == 0)
5834                 return;
5835         add_amd_assoc(devi, label, assoc);
5836         /*
5837          * Most AMD parts have a sectored cache. Multiple cache lines are
5838          * associated with each tag. A sector consists of all cache lines
5839          * associated with a tag. For example, the AMD K6-III has a sector
5840          * size of 2 cache lines per tag.
5841          */
5842         if (lines_per_tag != 0)
5843                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5844         add_cache_prop(devi, label, line_str, line_size);
5845         add_cache_prop(devi, label, size_str, size * 1024);
5846 }
5847 
5848 static void
5849 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5850 {
5851         switch (assoc) {
5852         case 0: /* off */
5853                 break;
5854         case 1:
5855         case 2:
5856         case 4:
5857                 add_cache_prop(devi, label, assoc_str, assoc);
5858                 break;
5859         case 6:
5860                 add_cache_prop(devi, label, assoc_str, 8);
5861                 break;
5862         case 8:
5863                 add_cache_prop(devi, label, assoc_str, 16);
5864                 break;
5865         case 0xf:
5866                 add_cache_prop(devi, label, fully_assoc, 1);
5867                 break;
5868         default: /* reserved; ignore */
5869                 break;
5870         }
5871 }
5872 
5873 static void
5874 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5875 {
5876         if (size == 0 || assoc == 0)
5877                 return;
5878         add_amd_l2_assoc(devi, label, assoc);
5879         add_cache_prop(devi, label, size_str, size);
5880 }
5881 
5882 static void
5883 add_amd_l2_cache(dev_info_t *devi, const char *label,
5884     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5885 {
5886         if (size == 0 || assoc == 0 || line_size == 0)
5887                 return;
5888         add_amd_l2_assoc(devi, label, assoc);
5889         if (lines_per_tag != 0)
5890                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5891         add_cache_prop(devi, label, line_str, line_size);
5892         add_cache_prop(devi, label, size_str, size * 1024);
5893 }
5894 
5895 static void
5896 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5897 {
5898         struct cpuid_regs *cp;
5899 
5900         if (cpi->cpi_xmaxeax < 0x80000005)
5901                 return;
5902         cp = &cpi->cpi_extd[5];
5903 
5904         /*
5905          * 4M/2M L1 TLB configuration
5906          *
5907          * We report the size for 2M pages because AMD uses two
5908          * TLB entries for one 4M page.
5909          */
5910         add_amd_tlb(devi, "dtlb-2M",
5911             BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5912         add_amd_tlb(devi, "itlb-2M",
5913             BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5914 
5915         /*
5916          * 4K L1 TLB configuration
5917          */
5918 
5919         switch (cpi->cpi_vendor) {
5920                 uint_t nentries;
5921         case X86_VENDOR_TM:
5922                 if (cpi->cpi_family >= 5) {
5923                         /*
5924                          * Crusoe processors have 256 TLB entries, but
5925                          * cpuid data format constrains them to only
5926                          * reporting 255 of them.
5927                          */
5928                         if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5929                                 nentries = 256;
5930                         /*
5931                          * Crusoe processors also have a unified TLB
5932                          */
5933                         add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5934                             nentries);
5935                         break;
5936                 }
5937                 /*FALLTHROUGH*/
5938         default:
5939                 add_amd_tlb(devi, itlb4k_str,
5940                     BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5941                 add_amd_tlb(devi, dtlb4k_str,
5942                     BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5943                 break;
5944         }
5945 
5946         /*
5947          * data L1 cache configuration
5948          */
5949 
5950         add_amd_cache(devi, l1_dcache_str,
5951             BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5952             BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5953 
5954         /*
5955          * code L1 cache configuration
5956          */
5957 
5958         add_amd_cache(devi, l1_icache_str,
5959             BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5960             BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5961 
5962         if (cpi->cpi_xmaxeax < 0x80000006)
5963                 return;
5964         cp = &cpi->cpi_extd[6];
5965 
5966         /* Check for a unified L2 TLB for large pages */
5967 
5968         if (BITX(cp->cp_eax, 31, 16) == 0)
5969                 add_amd_l2_tlb(devi, "l2-tlb-2M",
5970                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5971         else {
5972                 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5973                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5974                 add_amd_l2_tlb(devi, "l2-itlb-2M",
5975                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5976         }
5977 
5978         /* Check for a unified L2 TLB for 4K pages */
5979 
5980         if (BITX(cp->cp_ebx, 31, 16) == 0) {
5981                 add_amd_l2_tlb(devi, "l2-tlb-4K",
5982                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5983         } else {
5984                 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5985                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5986                 add_amd_l2_tlb(devi, "l2-itlb-4K",
5987                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5988         }
5989 
5990         add_amd_l2_cache(devi, l2_cache_str,
5991             BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5992             BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5993 }
5994 
5995 /*
5996  * There are two basic ways that the x86 world describes it cache
5997  * and tlb architecture - Intel's way and AMD's way.
5998  *
5999  * Return which flavor of cache architecture we should use
6000  */
6001 static int
6002 x86_which_cacheinfo(struct cpuid_info *cpi)
6003 {
6004         switch (cpi->cpi_vendor) {
6005         case X86_VENDOR_Intel:
6006                 if (cpi->cpi_maxeax >= 2)
6007                         return (X86_VENDOR_Intel);
6008                 break;
6009         case X86_VENDOR_AMD:
6010                 /*
6011                  * The K5 model 1 was the first part from AMD that reported
6012                  * cache sizes via extended cpuid functions.
6013                  */
6014                 if (cpi->cpi_family > 5 ||
6015                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
6016                         return (X86_VENDOR_AMD);
6017                 break;
6018         case X86_VENDOR_TM:
6019                 if (cpi->cpi_family >= 5)
6020                         return (X86_VENDOR_AMD);
6021                 /*FALLTHROUGH*/
6022         default:
6023                 /*
6024                  * If they have extended CPU data for 0x80000005
6025                  * then we assume they have AMD-format cache
6026                  * information.
6027                  *
6028                  * If not, and the vendor happens to be Cyrix,
6029                  * then try our-Cyrix specific handler.
6030                  *
6031                  * If we're not Cyrix, then assume we're using Intel's
6032                  * table-driven format instead.
6033                  */
6034                 if (cpi->cpi_xmaxeax >= 0x80000005)
6035                         return (X86_VENDOR_AMD);
6036                 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
6037                         return (X86_VENDOR_Cyrix);
6038                 else if (cpi->cpi_maxeax >= 2)
6039                         return (X86_VENDOR_Intel);
6040                 break;
6041         }
6042         return (-1);
6043 }
6044 
6045 void
6046 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
6047     struct cpuid_info *cpi)
6048 {
6049         dev_info_t *cpu_devi;
6050         int create;
6051 
6052         cpu_devi = (dev_info_t *)dip;
6053 
6054         /* device_type */
6055         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6056             "device_type", "cpu");
6057 
6058         /* reg */
6059         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6060             "reg", cpu_id);
6061 
6062         /* cpu-mhz, and clock-frequency */
6063         if (cpu_freq > 0) {
6064                 long long mul;
6065 
6066                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6067                     "cpu-mhz", cpu_freq);
6068                 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
6069                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6070                             "clock-frequency", (int)mul);
6071         }
6072 
6073         if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
6074                 return;
6075         }
6076 
6077         /* vendor-id */
6078         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6079             "vendor-id", cpi->cpi_vendorstr);
6080 
6081         if (cpi->cpi_maxeax == 0) {
6082                 return;
6083         }
6084 
6085         /*
6086          * family, model, and step
6087          */
6088         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6089             "family", CPI_FAMILY(cpi));
6090         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6091             "cpu-model", CPI_MODEL(cpi));
6092         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6093             "stepping-id", CPI_STEP(cpi));
6094 
6095         /* type */
6096         switch (cpi->cpi_vendor) {
6097         case X86_VENDOR_Intel:
6098                 create = 1;
6099                 break;
6100         default:
6101                 create = 0;
6102                 break;
6103         }
6104         if (create)
6105                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6106                     "type", CPI_TYPE(cpi));
6107 
6108         /* ext-family */
6109         switch (cpi->cpi_vendor) {
6110         case X86_VENDOR_Intel:
6111         case X86_VENDOR_AMD:
6112                 create = cpi->cpi_family >= 0xf;
6113                 break;
6114         default:
6115                 create = 0;
6116                 break;
6117         }
6118         if (create)
6119                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6120                     "ext-family", CPI_FAMILY_XTD(cpi));
6121 
6122         /* ext-model */
6123         switch (cpi->cpi_vendor) {
6124         case X86_VENDOR_Intel:
6125                 create = IS_EXTENDED_MODEL_INTEL(cpi);
6126                 break;
6127         case X86_VENDOR_AMD:
6128                 create = CPI_FAMILY(cpi) == 0xf;
6129                 break;
6130         default:
6131                 create = 0;
6132                 break;
6133         }
6134         if (create)
6135                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6136                     "ext-model", CPI_MODEL_XTD(cpi));
6137 
6138         /* generation */
6139         switch (cpi->cpi_vendor) {
6140         case X86_VENDOR_AMD:
6141                 /*
6142                  * AMD K5 model 1 was the first part to support this
6143                  */
6144                 create = cpi->cpi_xmaxeax >= 0x80000001;
6145                 break;
6146         default:
6147                 create = 0;
6148                 break;
6149         }
6150         if (create)
6151                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6152                     "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
6153 
6154         /* brand-id */
6155         switch (cpi->cpi_vendor) {
6156         case X86_VENDOR_Intel:
6157                 /*
6158                  * brand id first appeared on Pentium III Xeon model 8,
6159                  * and Celeron model 8 processors and Opteron
6160                  */
6161                 create = cpi->cpi_family > 6 ||
6162                     (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
6163                 break;
6164         case X86_VENDOR_AMD:
6165                 create = cpi->cpi_family >= 0xf;
6166                 break;
6167         default:
6168                 create = 0;
6169                 break;
6170         }
6171         if (create && cpi->cpi_brandid != 0) {
6172                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6173                     "brand-id", cpi->cpi_brandid);
6174         }
6175 
6176         /* chunks, and apic-id */
6177         switch (cpi->cpi_vendor) {
6178                 /*
6179                  * first available on Pentium IV and Opteron (K8)
6180                  */
6181         case X86_VENDOR_Intel:
6182                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6183                 break;
6184         case X86_VENDOR_AMD:
6185                 create = cpi->cpi_family >= 0xf;
6186                 break;
6187         default:
6188                 create = 0;
6189                 break;
6190         }
6191         if (create) {
6192                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6193                     "chunks", CPI_CHUNKS(cpi));
6194                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6195                     "apic-id", cpi->cpi_apicid);
6196                 if (cpi->cpi_chipid >= 0) {
6197                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6198                             "chip#", cpi->cpi_chipid);
6199                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6200                             "clog#", cpi->cpi_clogid);
6201                 }
6202         }
6203 
6204         /* cpuid-features */
6205         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6206             "cpuid-features", CPI_FEATURES_EDX(cpi));
6207 
6208 
6209         /* cpuid-features-ecx */
6210         switch (cpi->cpi_vendor) {
6211         case X86_VENDOR_Intel:
6212                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6213                 break;
6214         case X86_VENDOR_AMD:
6215                 create = cpi->cpi_family >= 0xf;
6216                 break;
6217         default:
6218                 create = 0;
6219                 break;
6220         }
6221         if (create)
6222                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6223                     "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6224 
6225         /* ext-cpuid-features */
6226         switch (cpi->cpi_vendor) {
6227         case X86_VENDOR_Intel:
6228         case X86_VENDOR_AMD:
6229         case X86_VENDOR_Cyrix:
6230         case X86_VENDOR_TM:
6231         case X86_VENDOR_Centaur:
6232                 create = cpi->cpi_xmaxeax >= 0x80000001;
6233                 break;
6234         default:
6235                 create = 0;
6236                 break;
6237         }
6238         if (create) {
6239                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6240                     "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6241                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6242                     "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6243         }
6244 
6245         /*
6246          * Brand String first appeared in Intel Pentium IV, AMD K5
6247          * model 1, and Cyrix GXm.  On earlier models we try and
6248          * simulate something similar .. so this string should always
6249          * same -something- about the processor, however lame.
6250          */
6251         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6252             "brand-string", cpi->cpi_brandstr);
6253 
6254         /*
6255          * Finally, cache and tlb information
6256          */
6257         switch (x86_which_cacheinfo(cpi)) {
6258         case X86_VENDOR_Intel:
6259                 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6260                 break;
6261         case X86_VENDOR_Cyrix:
6262                 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6263                 break;
6264         case X86_VENDOR_AMD:
6265                 amd_cache_info(cpi, cpu_devi);
6266                 break;
6267         default:
6268                 break;
6269         }
6270 }
6271 
6272 struct l2info {
6273         int *l2i_csz;
6274         int *l2i_lsz;
6275         int *l2i_assoc;
6276         int l2i_ret;
6277 };
6278 
6279 /*
6280  * A cacheinfo walker that fetches the size, line-size and associativity
6281  * of the L2 cache
6282  */
6283 static int
6284 intel_l2cinfo(void *arg, const struct cachetab *ct)
6285 {
6286         struct l2info *l2i = arg;
6287         int *ip;
6288 
6289         if (ct->ct_label != l2_cache_str &&
6290             ct->ct_label != sl2_cache_str)
6291                 return (0);     /* not an L2 -- keep walking */
6292 
6293         if ((ip = l2i->l2i_csz) != NULL)
6294                 *ip = ct->ct_size;
6295         if ((ip = l2i->l2i_lsz) != NULL)
6296                 *ip = ct->ct_line_size;
6297         if ((ip = l2i->l2i_assoc) != NULL)
6298                 *ip = ct->ct_assoc;
6299         l2i->l2i_ret = ct->ct_size;
6300         return (1);             /* was an L2 -- terminate walk */
6301 }
6302 
6303 /*
6304  * AMD L2/L3 Cache and TLB Associativity Field Definition:
6305  *
6306  *      Unlike the associativity for the L1 cache and tlb where the 8 bit
6307  *      value is the associativity, the associativity for the L2 cache and
6308  *      tlb is encoded in the following table. The 4 bit L2 value serves as
6309  *      an index into the amd_afd[] array to determine the associativity.
6310  *      -1 is undefined. 0 is fully associative.
6311  */
6312 
6313 static int amd_afd[] =
6314         {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6315 
6316 static void
6317 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6318 {
6319         struct cpuid_regs *cp;
6320         uint_t size, assoc;
6321         int i;
6322         int *ip;
6323 
6324         if (cpi->cpi_xmaxeax < 0x80000006)
6325                 return;
6326         cp = &cpi->cpi_extd[6];
6327 
6328         if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6329             (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6330                 uint_t cachesz = size * 1024;
6331                 assoc = amd_afd[i];
6332 
6333                 ASSERT(assoc != -1);
6334 
6335                 if ((ip = l2i->l2i_csz) != NULL)
6336                         *ip = cachesz;
6337                 if ((ip = l2i->l2i_lsz) != NULL)
6338                         *ip = BITX(cp->cp_ecx, 7, 0);
6339                 if ((ip = l2i->l2i_assoc) != NULL)
6340                         *ip = assoc;
6341                 l2i->l2i_ret = cachesz;
6342         }
6343 }
6344 
6345 int
6346 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6347 {
6348         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6349         struct l2info __l2info, *l2i = &__l2info;
6350 
6351         l2i->l2i_csz = csz;
6352         l2i->l2i_lsz = lsz;
6353         l2i->l2i_assoc = assoc;
6354         l2i->l2i_ret = -1;
6355 
6356         switch (x86_which_cacheinfo(cpi)) {
6357         case X86_VENDOR_Intel:
6358                 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6359                 break;
6360         case X86_VENDOR_Cyrix:
6361                 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6362                 break;
6363         case X86_VENDOR_AMD:
6364                 amd_l2cacheinfo(cpi, l2i);
6365                 break;
6366         default:
6367                 break;
6368         }
6369         return (l2i->l2i_ret);
6370 }
6371 
6372 #if !defined(__xpv)
6373 
6374 uint32_t *
6375 cpuid_mwait_alloc(cpu_t *cpu)
6376 {
6377         uint32_t        *ret;
6378         size_t          mwait_size;
6379 
6380         ASSERT(cpuid_checkpass(CPU, 2));
6381 
6382         mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6383         if (mwait_size == 0)
6384                 return (NULL);
6385 
6386         /*
6387          * kmem_alloc() returns cache line size aligned data for mwait_size
6388          * allocations.  mwait_size is currently cache line sized.  Neither
6389          * of these implementation details are guarantied to be true in the
6390          * future.
6391          *
6392          * First try allocating mwait_size as kmem_alloc() currently returns
6393          * correctly aligned memory.  If kmem_alloc() does not return
6394          * mwait_size aligned memory, then use mwait_size ROUNDUP.
6395          *
6396          * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6397          * decide to free this memory.
6398          */
6399         ret = kmem_zalloc(mwait_size, KM_SLEEP);
6400         if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6401                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6402                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6403                 *ret = MWAIT_RUNNING;
6404                 return (ret);
6405         } else {
6406                 kmem_free(ret, mwait_size);
6407                 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6408                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6409                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6410                 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6411                 *ret = MWAIT_RUNNING;
6412                 return (ret);
6413         }
6414 }
6415 
6416 void
6417 cpuid_mwait_free(cpu_t *cpu)
6418 {
6419         if (cpu->cpu_m.mcpu_cpi == NULL) {
6420                 return;
6421         }
6422 
6423         if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6424             cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6425                 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6426                     cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6427         }
6428 
6429         cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6430         cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6431 }
6432 
6433 void
6434 patch_tsc_read(int flag)
6435 {
6436         size_t cnt;
6437 
6438         switch (flag) {
6439         case TSC_NONE:
6440                 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6441                 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6442                 break;
6443         case TSC_RDTSC_MFENCE:
6444                 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6445                 (void) memcpy((void *)tsc_read,
6446                     (void *)&_tsc_mfence_start, cnt);
6447                 break;
6448         case TSC_RDTSC_LFENCE:
6449                 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6450                 (void) memcpy((void *)tsc_read,
6451                     (void *)&_tsc_lfence_start, cnt);
6452                 break;
6453         case TSC_TSCP:
6454                 cnt = &_tscp_end - &_tscp_start;
6455                 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6456                 break;
6457         default:
6458                 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6459                 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6460                 break;
6461         }
6462         tsc_type = flag;
6463 }
6464 
6465 int
6466 cpuid_deep_cstates_supported(void)
6467 {
6468         struct cpuid_info *cpi;
6469         struct cpuid_regs regs;
6470 
6471         ASSERT(cpuid_checkpass(CPU, 1));
6472 
6473         cpi = CPU->cpu_m.mcpu_cpi;
6474 
6475         if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6476                 return (0);
6477 
6478         switch (cpi->cpi_vendor) {
6479         case X86_VENDOR_Intel:
6480                 if (cpi->cpi_xmaxeax < 0x80000007)
6481                         return (0);
6482 
6483                 /*
6484                  * TSC run at a constant rate in all ACPI C-states?
6485                  */
6486                 regs.cp_eax = 0x80000007;
6487                 (void) __cpuid_insn(&regs);
6488                 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6489 
6490         default:
6491                 return (0);
6492         }
6493 }
6494 
6495 #endif  /* !__xpv */
6496 
6497 void
6498 post_startup_cpu_fixups(void)
6499 {
6500 #ifndef __xpv
6501         /*
6502          * Some AMD processors support C1E state. Entering this state will
6503          * cause the local APIC timer to stop, which we can't deal with at
6504          * this time.
6505          */
6506         if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6507                 on_trap_data_t otd;
6508                 uint64_t reg;
6509 
6510                 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6511                         reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6512                         /* Disable C1E state if it is enabled by BIOS */
6513                         if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6514                             AMD_ACTONCMPHALT_MASK) {
6515                                 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6516                                     AMD_ACTONCMPHALT_SHIFT);
6517                                 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6518                         }
6519                 }
6520                 no_trap();
6521         }
6522 #endif  /* !__xpv */
6523 }
6524 
6525 void
6526 enable_pcid(void)
6527 {
6528         if (x86_use_pcid == -1)
6529                 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6530 
6531         if (x86_use_invpcid == -1) {
6532                 x86_use_invpcid = is_x86_feature(x86_featureset,
6533                     X86FSET_INVPCID);
6534         }
6535 
6536         if (!x86_use_pcid)
6537                 return;
6538 
6539         /*
6540          * Intel say that on setting PCIDE, it immediately starts using the PCID
6541          * bits; better make sure there's nothing there.
6542          */
6543         ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6544 
6545         setcr4(getcr4() | CR4_PCIDE);
6546 }
6547 
6548 /*
6549  * Setup necessary registers to enable XSAVE feature on this processor.
6550  * This function needs to be called early enough, so that no xsave/xrstor
6551  * ops will execute on the processor before the MSRs are properly set up.
6552  *
6553  * Current implementation has the following assumption:
6554  * - cpuid_pass1() is done, so that X86 features are known.
6555  * - fpu_probe() is done, so that fp_save_mech is chosen.
6556  */
6557 void
6558 xsave_setup_msr(cpu_t *cpu)
6559 {
6560         ASSERT(fp_save_mech == FP_XSAVE);
6561         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6562 
6563         /* Enable OSXSAVE in CR4. */
6564         setcr4(getcr4() | CR4_OSXSAVE);
6565         /*
6566          * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6567          * correct value.
6568          */
6569         cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6570         setup_xfem();
6571 }
6572 
6573 /*
6574  * Starting with the Westmere processor the local
6575  * APIC timer will continue running in all C-states,
6576  * including the deepest C-states.
6577  */
6578 int
6579 cpuid_arat_supported(void)
6580 {
6581         struct cpuid_info *cpi;
6582         struct cpuid_regs regs;
6583 
6584         ASSERT(cpuid_checkpass(CPU, 1));
6585         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6586 
6587         cpi = CPU->cpu_m.mcpu_cpi;
6588 
6589         switch (cpi->cpi_vendor) {
6590         case X86_VENDOR_Intel:
6591                 /*
6592                  * Always-running Local APIC Timer is
6593                  * indicated by CPUID.6.EAX[2].
6594                  */
6595                 if (cpi->cpi_maxeax >= 6) {
6596                         regs.cp_eax = 6;
6597                         (void) cpuid_insn(NULL, &regs);
6598                         return (regs.cp_eax & CPUID_INTC_EAX_ARAT);
6599                 } else {
6600                         return (0);
6601                 }
6602         default:
6603                 return (0);
6604         }
6605 }
6606 
6607 /*
6608  * Check support for Intel ENERGY_PERF_BIAS feature
6609  */
6610 int
6611 cpuid_iepb_supported(struct cpu *cp)
6612 {
6613         struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6614         struct cpuid_regs regs;
6615 
6616         ASSERT(cpuid_checkpass(cp, 1));
6617 
6618         if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6619             !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6620                 return (0);
6621         }
6622 
6623         /*
6624          * Intel ENERGY_PERF_BIAS MSR is indicated by
6625          * capability bit CPUID.6.ECX.3
6626          */
6627         if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6628                 return (0);
6629 
6630         regs.cp_eax = 0x6;
6631         (void) cpuid_insn(NULL, &regs);
6632         return (regs.cp_ecx & CPUID_INTC_ECX_PERFBIAS);
6633 }
6634 
6635 /*
6636  * Check support for TSC deadline timer
6637  *
6638  * TSC deadline timer provides a superior software programming
6639  * model over local APIC timer that eliminates "time drifts".
6640  * Instead of specifying a relative time, software specifies an
6641  * absolute time as the target at which the processor should
6642  * generate a timer event.
6643  */
6644 int
6645 cpuid_deadline_tsc_supported(void)
6646 {
6647         struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6648         struct cpuid_regs regs;
6649 
6650         ASSERT(cpuid_checkpass(CPU, 1));
6651         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6652 
6653         switch (cpi->cpi_vendor) {
6654         case X86_VENDOR_Intel:
6655                 if (cpi->cpi_maxeax >= 1) {
6656                         regs.cp_eax = 1;
6657                         (void) cpuid_insn(NULL, &regs);
6658                         return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6659                 } else {
6660                         return (0);
6661                 }
6662         default:
6663                 return (0);
6664         }
6665 }
6666 
6667 #if defined(__amd64) && !defined(__xpv)
6668 /*
6669  * Patch in versions of bcopy for high performance Intel Nhm processors
6670  * and later...
6671  */
6672 void
6673 patch_memops(uint_t vendor)
6674 {
6675         size_t cnt, i;
6676         caddr_t to, from;
6677 
6678         if ((vendor == X86_VENDOR_Intel) &&
6679             is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6680                 cnt = &bcopy_patch_end - &bcopy_patch_start;
6681                 to = &bcopy_ck_size;
6682                 from = &bcopy_patch_start;
6683                 for (i = 0; i < cnt; i++) {
6684                         *to++ = *from++;
6685                 }
6686         }
6687 }
6688 #endif  /* __amd64 && !__xpv */
6689 
6690 /*
6691  * We're being asked to tell the system how many bits are required to represent
6692  * the various thread and strand IDs. While it's tempting to derive this based
6693  * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6694  * correct. Instead, this needs to be based on the number of bits that the APIC
6695  * allows for these different configurations. We only update these to a larger
6696  * value if we find one.
6697  */
6698 void
6699 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6700 {
6701         struct cpuid_info *cpi;
6702 
6703         VERIFY(cpuid_checkpass(CPU, 1));
6704         cpi = cpu->cpu_m.mcpu_cpi;
6705 
6706         if (cpi->cpi_ncore_bits > *core_nbits) {
6707                 *core_nbits = cpi->cpi_ncore_bits;
6708         }
6709 
6710         if (cpi->cpi_nthread_bits > *strand_nbits) {
6711                 *strand_nbits = cpi->cpi_nthread_bits;
6712         }
6713 }
6714 
6715 void
6716 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6717 {
6718         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6719         struct cpuid_regs cp;
6720 
6721         /*
6722          * Reread the CPUID portions that we need for various security
6723          * information.
6724          */
6725         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6726                 /*
6727                  * Check if we now have leaf 7 available to us.
6728                  */
6729                 if (cpi->cpi_maxeax < 7) {
6730                         bzero(&cp, sizeof (cp));
6731                         cp.cp_eax = 0;
6732                         cpi->cpi_maxeax = __cpuid_insn(&cp);
6733                         if (cpi->cpi_maxeax < 7)
6734                                 return;
6735                 }
6736 
6737                 bzero(&cp, sizeof (cp));
6738                 cp.cp_eax = 7;
6739                 cp.cp_ecx = 0;
6740                 (void) __cpuid_insn(&cp);
6741                 cpi->cpi_std[7] = cp;
6742         } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6743                 /* No xcpuid support */
6744                 if (cpi->cpi_family < 5 ||
6745                     (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6746                         return;
6747 
6748                 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6749                         bzero(&cp, sizeof (cp));
6750                         cp.cp_eax = CPUID_LEAF_EXT_0;
6751                         cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6752                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6753                                 return;
6754                         }
6755                 }
6756 
6757                 bzero(&cp, sizeof (cp));
6758                 cp.cp_eax = CPUID_LEAF_EXT_8;
6759                 (void) __cpuid_insn(&cp);
6760                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6761                 cpi->cpi_extd[8] = cp;
6762         } else {
6763                 /*
6764                  * Nothing to do here. Return an empty set which has already
6765                  * been zeroed for us.
6766                  */
6767                 return;
6768         }
6769         cpuid_scan_security(cpu, fset);
6770 }
6771 
6772 /* ARGSUSED */
6773 static int
6774 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6775 {
6776         uchar_t *fset;
6777 
6778         fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6779         cpuid_pass_ucode(CPU, fset);
6780 
6781         return (0);
6782 }
6783 
6784 /*
6785  * After a microcode update where the version has changed, then we need to
6786  * rescan CPUID. To do this we check every CPU to make sure that they have the
6787  * same microcode. Then we perform a cross call to all such CPUs. It's the
6788  * caller's job to make sure that no one else can end up doing an update while
6789  * this is going on.
6790  *
6791  * We assume that the system is microcode capable if we're called.
6792  */
6793 void
6794 cpuid_post_ucodeadm(void)
6795 {
6796         uint32_t rev;
6797         int i;
6798         struct cpu *cpu;
6799         cpuset_t cpuset;
6800         void *argdata;
6801         uchar_t *f0;
6802 
6803         argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6804 
6805         mutex_enter(&cpu_lock);
6806         cpu = cpu_get(0);
6807         rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6808         CPUSET_ONLY(cpuset, 0);
6809         for (i = 1; i < max_ncpus; i++) {
6810                 if ((cpu = cpu_get(i)) == NULL)
6811                         continue;
6812 
6813                 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6814                         panic("post microcode update CPU %d has differing "
6815                             "microcode revision (%u) from CPU 0 (%u)",
6816                             i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6817                 }
6818                 CPUSET_ADD(cpuset, i);
6819         }
6820 
6821         kpreempt_disable();
6822         xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6823             cpuid_post_ucodeadm_xc);
6824         kpreempt_enable();
6825 
6826         /*
6827          * OK, now look at each CPU and see if their feature sets are equal.
6828          */
6829         f0 = argdata;
6830         for (i = 1; i < max_ncpus; i++) {
6831                 uchar_t *fset;
6832                 if (!CPU_IN_SET(cpuset, i))
6833                         continue;
6834 
6835                 fset = (uchar_t *)((uintptr_t)argdata +
6836                     sizeof (x86_featureset) * i);
6837 
6838                 if (!compare_x86_featureset(f0, fset)) {
6839                         panic("Post microcode update CPU %d has "
6840                             "differing security feature (%p) set from CPU 0 "
6841                             "(%p), not appending to feature set", i,
6842                             (void *)fset, (void *)f0);
6843                 }
6844         }
6845 
6846         mutex_exit(&cpu_lock);
6847 
6848         for (i = 0; i < NUM_X86_FEATURES; i++) {
6849                 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6850                     x86_feature_names[i]);
6851                 if (is_x86_feature(f0, i)) {
6852                         add_x86_feature(x86_featureset, i);
6853                 }
6854         }
6855         kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6856 }