1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
  26  */
  27 /*
  28  * Copyright (c) 2010, Intel Corporation.
  29  * All rights reserved.
  30  */
  31 /*
  32  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  33  */
  34 /*
  35  * Copyright 2019, Joyent, Inc.
  36  */
  37 
  38 /*
  39  * CPU Identification logic
  40  *
  41  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  42  * with the identification of CPUs, their features, and their topologies. More
  43  * specifically, this file helps drive the following:
  44  *
  45  * 1. Enumeration of features of the processor which are used by the kernel to
  46  *    determine what features to enable or disable. These may be instruction set
  47  *    enhancements or features that we use.
  48  *
  49  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  50  *    will be told about through the auxiliary vector.
  51  *
  52  * 3. Understanding the physical topology of the CPU such as the number of
  53  *    caches, how many cores it has, whether or not it supports symmetric
  54  *    multi-processing (SMT), etc.
  55  *
  56  * ------------------------
  57  * CPUID History and Basics
  58  * ------------------------
  59  *
  60  * The cpuid instruction was added by Intel roughly around the time that the
  61  * original Pentium was introduced. The purpose of cpuid was to tell in a
  62  * programmatic fashion information about the CPU that previously was guessed
  63  * at. For example, an important part of cpuid is that we can know what
  64  * extensions to the ISA exist. If you use an invalid opcode you would get a
  65  * #UD, so this method allows a program (whether a user program or the kernel)
  66  * to determine what exists without crashing or getting a SIGILL. Of course,
  67  * this was also during the era of the clones and the AMD Am5x86. The vendor
  68  * name shows up first in cpuid for a reason.
  69  *
  70  * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
  71  * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
  72  * its own meaning. The different leaves are broken down into different regions:
  73  *
  74  *      [ 0, 7fffffff ]                 This region is called the 'basic'
  75  *                                      region. This region is generally defined
  76  *                                      by Intel, though some of the original
  77  *                                      portions have different meanings based
  78  *                                      on the manufacturer. These days, Intel
  79  *                                      adds most new features to this region.
  80  *                                      AMD adds non-Intel compatible
  81  *                                      information in the third, extended
  82  *                                      region. Intel uses this for everything
  83  *                                      including ISA extensions, CPU
  84  *                                      features, cache information, topology,
  85  *                                      and more.
  86  *
  87  *                                      There is a hole carved out of this
  88  *                                      region which is reserved for
  89  *                                      hypervisors.
  90  *
  91  *      [ 40000000, 4fffffff ]          This region, which is found in the
  92  *                                      middle of the previous region, is
  93  *                                      explicitly promised to never be used by
  94  *                                      CPUs. Instead, it is used by hypervisors
  95  *                                      to communicate information about
  96  *                                      themselves to the operating system. The
  97  *                                      values and details are unique for each
  98  *                                      hypervisor.
  99  *
 100  *      [ 80000000, ffffffff ]          This region is called the 'extended'
 101  *                                      region. Some of the low leaves mirror
 102  *                                      parts of the basic leaves. This region
 103  *                                      has generally been used by AMD for
 104  *                                      various extensions. For example, AMD-
 105  *                                      specific information about caches,
 106  *                                      features, and topology are found in this
 107  *                                      region.
 108  *
 109  * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
 110  * and %edx, and then issue the cpuid instruction. At the first leaf in each of
 111  * the ranges, one of the primary things returned is the maximum valid leaf in
 112  * that range. This allows for discovery of what range of CPUID is valid.
 113  *
 114  * The CPUs have potentially surprising behavior when using an invalid leaf or
 115  * unimplemented leaf. If the requested leaf is within the valid basic or
 116  * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
 117  * set to zero. However, if you specify a leaf that is outside of a valid range,
 118  * then instead it will be filled with the last valid _basic_ leaf. For example,
 119  * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
 120  * an invalid extended leaf will return the information for leaf 3.
 121  *
 122  * Some leaves are broken down into sub-leaves. This means that the value
 123  * depends on both the leaf asked for in %eax and a secondary register. For
 124  * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
 125  * additional information. Or when getting topology information in leaf 0xb, the
 126  * initial value in %ecx changes which level of the topology that you are
 127  * getting information about.
 128  *
 129  * cpuid values are always kept to 32 bits regardless of whether or not the
 130  * program is in 64-bit mode. When executing in 64-bit mode, the upper
 131  * 32 bits of the register are always set to zero so that way the values are the
 132  * same regardless of execution mode.
 133  *
 134  * ----------------------
 135  * Identifying Processors
 136  * ----------------------
 137  *
 138  * We can identify a processor in two steps. The first step looks at cpuid leaf
 139  * 0. Leaf 0 contains the processor's vendor information. This is done by
 140  * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
 141  * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
 142  *
 143  * From there, a processor is identified by a combination of three different
 144  * values:
 145  *
 146  *  1. Family
 147  *  2. Model
 148  *  3. Stepping
 149  *
 150  * Each vendor uses the family and model to uniquely identify a processor. The
 151  * way that family and model are changed depends on the vendor. For example,
 152  * Intel has been using family 0x6 for almost all of their processor since the
 153  * Pentium Pro/Pentium II era, often called the P6. The model is used to
 154  * identify the exact processor. Different models are often used for the client
 155  * (consumer) and server parts. Even though each processor often has major
 156  * architectural differences, they still are considered the same family by
 157  * Intel.
 158  *
 159  * On the other hand, each major AMD architecture generally has its own family.
 160  * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
 161  * the model number is used to help identify specific processors.
 162  *
 163  * The stepping is used to refer to a revision of a specific microprocessor. The
 164  * term comes from equipment used to produce masks that are used to create
 165  * integrated circuits.
 166  *
 167  * The information is present in leaf 1, %eax. In technical documentation you
 168  * will see the terms extended model and extended family. The original family,
 169  * model, and stepping fields were each 4 bits wide. If the values in either
 170  * are 0xf, then one is to consult the extended model and extended family, which
 171  * take previously reserved bits and allow for a larger number of models and add
 172  * 0xf to them.
 173  *
 174  * When we process this information, we store the full family, model, and
 175  * stepping in the struct cpuid_info members cpi_family, cpi_model, and
 176  * cpi_step, respectively. Whenever you are performing comparisons with the
 177  * family, model, and stepping, you should use these members and not the raw
 178  * values from cpuid. If you must use the raw values from cpuid directly, you
 179  * must make sure that you add the extended model and family to the base model
 180  * and family.
 181  *
 182  * In general, we do not use information about the family, model, and stepping
 183  * to determine whether or not a feature is present; that is generally driven by
 184  * specific leaves. However, when something we care about on the processor is
 185  * not considered 'architectural' meaning that it is specific to a set of
 186  * processors and not promised in the architecture model to be consistent from
 187  * generation to generation, then we will fall back on this information. The
 188  * most common cases where this comes up is when we have to workaround errata in
 189  * the processor, are dealing with processor-specific features such as CPU
 190  * performance counters, or we want to provide additional information for things
 191  * such as fault management.
 192  *
 193  * While processors also do have a brand string, which is the name that people
 194  * are familiar with when buying the processor, they are not meant for
 195  * programmatic consumption. That is what the family, model, and stepping are
 196  * for.
 197  *
 198  * ------------
 199  * CPUID Passes
 200  * ------------
 201  *
 202  * As part of performing feature detection, we break this into several different
 203  * passes. The passes are as follows:
 204  *
 205  *      Pass 0          This is a primordial pass done in locore.s to deal with
 206  *                      Cyrix CPUs that don't support cpuid. The reality is that
 207  *                      we likely don't run on them any more, but there is still
 208  *                      logic for handling them.
 209  *
 210  *      Pass 1          This is the primary pass and is responsible for doing a
 211  *                      large number of different things:
 212  *
 213  *                      1. Determine which vendor manufactured the CPU and
 214  *                      determining the family, model, and stepping information.
 215  *
 216  *                      2. Gathering a large number of feature flags to
 217  *                      determine which features the CPU support and which
 218  *                      indicate things that we need to do other work in the OS
 219  *                      to enable. Features detected this way are added to the
 220  *                      x86_featureset which can be queried to
 221  *                      determine what we should do. This includes processing
 222  *                      all of the basic and extended CPU features that we care
 223  *                      about.
 224  *
 225  *                      3. Determining the CPU's topology. This includes
 226  *                      information about how many cores and threads are present
 227  *                      in the package. It also is responsible for figuring out
 228  *                      which logical CPUs are potentially part of the same core
 229  *                      and what other resources they might share. For more
 230  *                      information see the 'Topology' section.
 231  *
 232  *                      4. Determining the set of CPU security-specific features
 233  *                      that we need to worry about and determine the
 234  *                      appropriate set of workarounds.
 235  *
 236  *                      Pass 1 on the boot CPU occurs before KMDB is started.
 237  *
 238  *      Pass 2          The second pass is done after startup(). Here, we check
 239  *                      other miscellaneous features. Most of this is gathering
 240  *                      additional basic and extended features that we'll use in
 241  *                      later passes or for debugging support.
 242  *
 243  *      Pass 3          The third pass occurs after the kernel memory allocator
 244  *                      has been fully initialized. This gathers information
 245  *                      where we might need dynamic memory available for our
 246  *                      uses. This includes several varying width leaves that
 247  *                      have cache information and the processor's brand string.
 248  *
 249  *      Pass 4          The fourth and final normal pass is performed after the
 250  *                      kernel has brought most everything online. This is
 251  *                      invoked from post_startup(). In this pass, we go through
 252  *                      the set of features that we have enabled and turn that
 253  *                      into the hardware auxiliary vector features that
 254  *                      userland receives. This is used by userland, primarily
 255  *                      by the run-time link-editor (RTLD), though userland
 256  *                      software could also refer to it directly.
 257  *
 258  *      Microcode       After a microcode update, we do a selective rescan of
 259  *                      the cpuid leaves to determine what features have
 260  *                      changed. Microcode updates can provide more details
 261  *                      about security related features to deal with issues like
 262  *                      Spectre and L1TF. On occasion, vendors have violated
 263  *                      their contract and removed bits. However, we don't try
 264  *                      to detect that because that puts us in a situation that
 265  *                      we really can't deal with. As such, the only thing we
 266  *                      rescan are security related features today. See
 267  *                      cpuid_pass_ucode().
 268  *
 269  * All of the passes (except pass 0) are run on all CPUs. However, for the most
 270  * part we only care about what the boot CPU says about this information and use
 271  * the other CPUs as a rough guide to sanity check that we have the same feature
 272  * set.
 273  *
 274  * We do not support running multiple logical CPUs with disjoint, let alone
 275  * different, feature sets.
 276  *
 277  * ------------------
 278  * Processor Topology
 279  * ------------------
 280  *
 281  * One of the important things that we need to do is to understand the topology
 282  * of the underlying processor. When we say topology in this case, we're trying
 283  * to understand the relationship between the logical CPUs that the operating
 284  * system sees and the underlying physical layout. Different logical CPUs may
 285  * share different resources which can have important consequences for the
 286  * performance of the system. For example, they may share caches, execution
 287  * units, and more.
 288  *
 289  * The topology of the processor changes from generation to generation and
 290  * vendor to vendor.  Along with that, different vendors use different
 291  * terminology, and the operating system itself uses occasionally overlapping
 292  * terminology. It's important to understand what this topology looks like so
 293  * one can understand the different things that we try to calculate and
 294  * determine.
 295  *
 296  * To get started, let's talk about a little bit of terminology that we've used
 297  * so far, is used throughout this file, and is fairly generic across multiple
 298  * vendors:
 299  *
 300  * CPU
 301  *      A central processing unit (CPU) refers to a logical and/or virtual
 302  *      entity that the operating system can execute instructions on. The
 303  *      underlying resources for this CPU may be shared between multiple
 304  *      entities; however, to the operating system it is a discrete unit.
 305  *
 306  * PROCESSOR and PACKAGE
 307  *
 308  *      Generally, when we use the term 'processor' on its own, we are referring
 309  *      to the physical entity that one buys and plugs into a board. However,
 310  *      because processor has been overloaded and one might see it used to mean
 311  *      multiple different levels, we will instead use the term 'package' for
 312  *      the rest of this file. The term package comes from the electrical
 313  *      engineering side and refers to the physical entity that encloses the
 314  *      electronics inside. Strictly speaking the package can contain more than
 315  *      just the CPU, for example, on many processors it may also have what's
 316  *      called an 'integrated graphical processing unit (GPU)'. Because the
 317  *      package can encapsulate multiple units, it is the largest physical unit
 318  *      that we refer to.
 319  *
 320  * SOCKET
 321  *
 322  *      A socket refers to unit on a system board (generally the motherboard)
 323  *      that can receive a package. A single package, or processor, is plugged
 324  *      into a single socket. A system may have multiple sockets. Often times,
 325  *      the term socket is used interchangeably with package and refers to the
 326  *      electrical component that has plugged in, and not the receptacle itself.
 327  *
 328  * CORE
 329  *
 330  *      A core refers to the physical instantiation of a CPU, generally, with a
 331  *      full set of hardware resources available to it. A package may contain
 332  *      multiple cores inside of it or it may just have a single one. A
 333  *      processor with more than one core is often referred to as 'multi-core'.
 334  *      In illumos, we will use the feature X86FSET_CMP to refer to a system
 335  *      that has 'multi-core' processors.
 336  *
 337  *      A core may expose a single logical CPU to the operating system, or it
 338  *      may expose multiple CPUs, which we call threads, defined below.
 339  *
 340  *      Some resources may still be shared by cores in the same package. For
 341  *      example, many processors will share the level 3 cache between cores.
 342  *      Some AMD generations share hardware resources between cores. For more
 343  *      information on that see the section 'AMD Topology'.
 344  *
 345  * THREAD and STRAND
 346  *
 347  *      In this file, generally a thread refers to a hardware resources and not
 348  *      the operating system's logical abstraction. A thread is always exposed
 349  *      as an independent logical CPU to the operating system. A thread belongs
 350  *      to a specific core. A core may have more than one thread. When that is
 351  *      the case, the threads that are part of the same core are often referred
 352  *      to as 'siblings'.
 353  *
 354  *      When multiple threads exist, this is generally referred to as
 355  *      simultaneous multi-threading (SMT). When Intel introduced this in their
 356  *      processors they called it hyper-threading (HT). When multiple threads
 357  *      are active in a core, they split the resources of the core. For example,
 358  *      two threads may share the same set of hardware execution units.
 359  *
 360  *      The operating system often uses the term 'strand' to refer to a thread.
 361  *      This helps disambiguate it from the software concept.
 362  *
 363  * CHIP
 364  *
 365  *      Unfortunately, the term 'chip' is dramatically overloaded. At its most
 366  *      base meaning, it is used to refer to a single integrated circuit, which
 367  *      may or may not be the only thing in the package. In illumos, when you
 368  *      see the term 'chip' it is almost always referring to the same thing as
 369  *      the 'package'. However, many vendors may use chip to refer to one of
 370  *      many integrated circuits that have been placed in the package. As an
 371  *      example, see the subsequent definition.
 372  *
 373  *      To try and keep things consistent, we will only use chip when referring
 374  *      to the entire integrated circuit package, with the exception of the
 375  *      definition of multi-chip module (because it is in the name) and use the
 376  *      term 'die' when we want the more general, potential sub-component
 377  *      definition.
 378  *
 379  * DIE
 380  *
 381  *      A die refers to an integrated circuit. Inside of the package there may
 382  *      be a single die or multiple dies. This is sometimes called a 'chip' in
 383  *      vendor's parlance, but in this file, we use the term die to refer to a
 384  *      subcomponent.
 385  *
 386  * MULTI-CHIP MODULE
 387  *
 388  *      A multi-chip module (MCM) refers to putting multiple distinct chips that
 389  *      are connected together in the same package. When a multi-chip design is
 390  *      used, generally each chip is manufactured independently and then joined
 391  *      together in the package. For example, on AMD's Zen microarchitecture
 392  *      (family 0x17), the package contains several dies (the second meaning of
 393  *      chip from above) that are connected together.
 394  *
 395  * CACHE
 396  *
 397  *      A cache is a part of the processor that maintains copies of recently
 398  *      accessed memory. Caches are split into levels and then into types.
 399  *      Commonly there are one to three levels, called level one, two, and
 400  *      three. The lower the level, the smaller it is, the closer it is to the
 401  *      execution units of the CPU, and the faster it is to access. The layout
 402  *      and design of the cache come in many different flavors, consult other
 403  *      resources for a discussion of those.
 404  *
 405  *      Caches are generally split into two types, the instruction and data
 406  *      cache. The caches contain what their names suggest, the instruction
 407  *      cache has executable program text, while the data cache has all other
 408  *      memory that the processor accesses. As of this writing, data is kept
 409  *      coherent between all of the caches on x86, so if one modifies program
 410  *      text before it is executed, that will be in the data cache, and the
 411  *      instruction cache will be synchronized with that change when the
 412  *      processor actually executes those instructions. This coherency also
 413  *      covers the fact that data could show up in multiple caches.
 414  *
 415  *      Generally, the lowest level caches are specific to a core. However, the
 416  *      last layer cache is shared between some number of cores. The number of
 417  *      CPUs sharing this last level cache is important. This has implications
 418  *      for the choices that the scheduler makes, as accessing memory that might
 419  *      be in a remote cache after thread migration can be quite expensive.
 420  *
 421  *      Sometimes, the word cache is abbreviated with a '$', because in US
 422  *      English the word cache is pronounced the same as cash. So L1D$ refers to
 423  *      the L1 data cache, and L2$ would be the L2 cache. This will not be used
 424  *      in the rest of this theory statement for clarity.
 425  *
 426  * MEMORY CONTROLLER
 427  *
 428  *      The memory controller is a component that provides access to DRAM. Each
 429  *      memory controller can access a set number of DRAM channels. Each channel
 430  *      can have a number of DIMMs (sticks of memory) associated with it. A
 431  *      given package may have more than one memory controller. The association
 432  *      of the memory controller to a group of cores is important as it is
 433  *      cheaper to access memory on the controller that you are associated with.
 434  *
 435  * NUMA
 436  *
 437  *      NUMA or non-uniform memory access, describes a way that systems are
 438  *      built. On x86, any processor core can address all of the memory in the
 439  *      system. However, When using multiple sockets or possibly within a
 440  *      multi-chip module, some of that memory is physically closer and some of
 441  *      it is further. Memory that is further away is more expensive to access.
 442  *      Consider the following image of multiple sockets with memory:
 443  *
 444  *      +--------+                                                +--------+
 445  *      | DIMM A |         +----------+      +----------+         | DIMM D |
 446  *      +--------+-+       |          |      |          |       +-+------+-+
 447  *        | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
 448  *        +--------+-+     |          |      |          |     +-+------+-+
 449  *          | DIMM C |     +----------+      +----------+     | DIMM F |
 450  *          +--------+                                        +--------+
 451  *
 452  *      In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
 453  *      closer to DIMMs D-F. This means that it is cheaper for socket 0 to
 454  *      access DIMMs A-C and more expensive to access D-F as it has to go
 455  *      through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
 456  *      D-F are cheaper than A-C. While the socket form is the most common, when
 457  *      using multi-chip modules, this can also sometimes occur. For another
 458  *      example of this that's more involved, see the AMD topology section.
 459  *
 460  *
 461  * Intel Topology
 462  * --------------
 463  *
 464  * Most Intel processors since Nehalem, (as of this writing the current gen
 465  * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
 466  * the package is a single monolithic die. MCMs currently aren't used. Most
 467  * parts have three levels of caches, with the L3 cache being shared between
 468  * all of the cores on the package. The L1/L2 cache is generally specific to
 469  * an individual core. The following image shows at a simplified level what
 470  * this looks like. The memory controller is commonly part of something called
 471  * the 'Uncore', that used to be separate physical chips that were not a part of
 472  * the package, but are now part of the same chip.
 473  *
 474  *  +-----------------------------------------------------------------------+
 475  *  | Package                                                               |
 476  *  |  +-------------------+  +-------------------+  +-------------------+  |
 477  *  |  | Core              |  | Core              |  | Core              |  |
 478  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 479  *  |  |  | Thread | | L | |  |  | Thread | | L | |  |  | Thread | | L | |  |
 480  *  |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |
 481  *  |  |  +--------+ |   | |  |  +--------+ |   | |  |  +--------+ |   | |  |
 482  *  |  |  | Thread | |   | |  |  | Thread | |   | |  |  | Thread | |   | |  |
 483  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 484  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 485  *  |  |  | L2 Cache     | |  |  | L2 Cache     | |  |  | L2 Cache     | |  |
 486  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 487  *  |  +-------------------+  +-------------------+  +-------------------+  |
 488  *  | +-------------------------------------------------------------------+ |
 489  *  | |                         Shared L3 Cache                           | |
 490  *  | +-------------------------------------------------------------------+ |
 491  *  | +-------------------------------------------------------------------+ |
 492  *  | |                        Memory Controller                          | |
 493  *  | +-------------------------------------------------------------------+ |
 494  *  +-----------------------------------------------------------------------+
 495  *
 496  * A side effect of this current architecture is that what we care about from a
 497  * scheduling and topology perspective, is simplified. In general we care about
 498  * understanding which logical CPUs are part of the same core and socket.
 499  *
 500  * To determine the relationship between threads and cores, Intel initially used
 501  * the identifier in the advanced programmable interrupt controller (APIC). They
 502  * also added cpuid leaf 4 to give additional information about the number of
 503  * threads and CPUs in the processor. With the addition of x2apic (which
 504  * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
 505  * additional cpuid topology leaf 0xB was added.
 506  *
 507  * AMD Topology
 508  * ------------
 509  *
 510  * When discussing AMD topology, we want to break this into three distinct
 511  * generations of topology. There's the basic topology that has been used in
 512  * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
 513  * with family 0x15 (Bulldozer), and there's the topology that was introduced
 514  * with family 0x17 (Zen). AMD also has some additional terminology that's worth
 515  * talking about.
 516  *
 517  * Until the introduction of family 0x17 (Zen), AMD did not implement something
 518  * that they considered SMT. Whether or not the AMD processors have SMT
 519  * influences many things including scheduling and reliability, availability,
 520  * and serviceability (RAS) features.
 521  *
 522  * NODE
 523  *
 524  *      AMD uses the term node to refer to a die that contains a number of cores
 525  *      and I/O resources. Depending on the processor family and model, more
 526  *      than one node can be present in the package. When there is more than one
 527  *      node this indicates a multi-chip module. Usually each node has its own
 528  *      access to memory and I/O devices. This is important and generally
 529  *      different from the corresponding Intel Nehalem-Skylake+ processors. As a
 530  *      result, we track this relationship in the operating system.
 531  *
 532  *      In processors with an L3 cache, the L3 cache is generally shared across
 533  *      the entire node, though the way this is carved up varies from generation
 534  *      to generation.
 535  *
 536  * BULLDOZER
 537  *
 538  *      Starting with the Bulldozer family (0x15) and continuing until the
 539  *      introduction of the Zen microarchitecture, AMD introduced the idea of a
 540  *      compute unit. In a compute unit, two traditional cores share a number of
 541  *      hardware resources. Critically, they share the FPU, L1 instruction
 542  *      cache, and the L2 cache. Several compute units were then combined inside
 543  *      of a single node.  Because the integer execution units, L1 data cache,
 544  *      and some other resources were not shared between the cores, AMD never
 545  *      considered this to be SMT.
 546  *
 547  * ZEN
 548  *
 549  *      The Zen family (0x17) uses a multi-chip module (MCM) design, the module
 550  *      is called Zeppelin. These modules are similar to the idea of nodes used
 551  *      previously. Each of these nodes has two DRAM channels which all of the
 552  *      cores in the node can access uniformly. These nodes are linked together
 553  *      in the package, creating a NUMA environment.
 554  *
 555  *      The Zeppelin die itself contains two different 'core complexes'. Each
 556  *      core complex consists of four cores which each have two threads, for a
 557  *      total of 8 logical CPUs per complex. Unlike other generations,
 558  *      where all the logical CPUs in a given node share the L3 cache, here each
 559  *      core complex has its own shared L3 cache.
 560  *
 561  *      A further thing that we need to consider is that in some configurations,
 562  *      particularly with the Threadripper line of processors, not every die
 563  *      actually has its memory controllers wired up to actual memory channels.
 564  *      This means that some cores have memory attached to them and others
 565  *      don't.
 566  *
 567  *      To put Zen in perspective, consider the following images:
 568  *
 569  *      +--------------------------------------------------------+
 570  *      | Core Complex                                           |
 571  *      | +-------------------+    +-------------------+  +---+  |
 572  *      | | Core       +----+ |    | Core       +----+ |  |   |  |
 573  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  |   |  |
 574  *      | | | Thread | +----+ |    | | Thread | +----+ |  |   |  |
 575  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  | L |  |
 576  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  | 3 |  |
 577  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 578  *      | +-------------------+    +-------------------+  | C |  |
 579  *      | +-------------------+    +-------------------+  | a |  |
 580  *      | | Core       +----+ |    | Core       +----+ |  | c |  |
 581  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  | h |  |
 582  *      | | | Thread | +----+ |    | | Thread | +----+ |  | e |  |
 583  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  |   |  |
 584  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  |   |  |
 585  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 586  *      | +-------------------+    +-------------------+  +---+  |
 587  *      |                                                        |
 588  *      +--------------------------------------------------------+
 589  *
 590  *  This first image represents a single Zen core complex that consists of four
 591  *  cores.
 592  *
 593  *
 594  *      +--------------------------------------------------------+
 595  *      | Zeppelin Die                                           |
 596  *      |  +--------------------------------------------------+  |
 597  *      |  |         I/O Units (PCIe, SATA, USB, etc.)        |  |
 598  *      |  +--------------------------------------------------+  |
 599  *      |                           HH                           |
 600  *      |          +-----------+    HH    +-----------+          |
 601  *      |          |           |    HH    |           |          |
 602  *      |          |    Core   |==========|    Core   |          |
 603  *      |          |  Complex  |==========|  Complex  |          |
 604  *      |          |           |    HH    |           |          |
 605  *      |          +-----------+    HH    +-----------+          |
 606  *      |                           HH                           |
 607  *      |  +--------------------------------------------------+  |
 608  *      |  |                Memory Controller                 |  |
 609  *      |  +--------------------------------------------------+  |
 610  *      |                                                        |
 611  *      +--------------------------------------------------------+
 612  *
 613  *  This image represents a single Zeppelin Die. Note how both cores are
 614  *  connected to the same memory controller and I/O units. While each core
 615  *  complex has its own L3 cache as seen in the first image, they both have
 616  *  uniform access to memory.
 617  *
 618  *
 619  *                      PP                     PP
 620  *                      PP                     PP
 621  *           +----------PP---------------------PP---------+
 622  *           |          PP                     PP         |
 623  *           |    +-----------+          +-----------+    |
 624  *           |    |           |          |           |    |
 625  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 626  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 627  *           |    |           |          |           |    |
 628  *           |    +-----------+ooo    ...+-----------+    |
 629  *           |          HH      ooo  ...       HH         |
 630  *           |          HH        oo..         HH         |
 631  *           |          HH        ..oo         HH         |
 632  *           |          HH      ...  ooo       HH         |
 633  *           |    +-----------+...    ooo+-----------+    |
 634  *           |    |           |          |           |    |
 635  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 636  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 637  *           |    |           |          |           |    |
 638  *           |    +-----------+          +-----------+    |
 639  *           |          PP                     PP         |
 640  *           +----------PP---------------------PP---------+
 641  *                      PP                     PP
 642  *                      PP                     PP
 643  *
 644  *  This image represents a single Zen package. In this example, it has four
 645  *  Zeppelin dies, though some configurations only have a single one. In this
 646  *  example, each die is directly connected to the next. Also, each die is
 647  *  represented as being connected to memory by the 'M' character and connected
 648  *  to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
 649  *  die is made up of two core complexes, we have multiple different NUMA
 650  *  domains that we care about for these systems.
 651  *
 652  * CPUID LEAVES
 653  *
 654  * There are a few different CPUID leaves that we can use to try and understand
 655  * the actual state of the world. As part of the introduction of family 0xf, AMD
 656  * added CPUID leaf 0x80000008. This leaf tells us the number of logical
 657  * processors that are in the system. Because families before Zen didn't have
 658  * SMT, this was always the number of cores that were in the system. However, it
 659  * should always be thought of as the number of logical threads to be consistent
 660  * between generations. In addition we also get the size of the APIC ID that is
 661  * used to represent the number of logical processors. This is important for
 662  * deriving topology information.
 663  *
 664  * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
 665  * bit between Bulldozer and later families, but it is quite useful in
 666  * determining the topology information. Because this information has changed
 667  * across family generations, it's worth calling out what these mean
 668  * explicitly. The registers have the following meanings:
 669  *
 670  *      %eax    The APIC ID. The entire register is defined to have a 32-bit
 671  *              APIC ID, even though on systems without x2apic support, it will
 672  *              be limited to 8 bits.
 673  *
 674  *      %ebx    On Bulldozer-era systems this contains information about the
 675  *              number of cores that are in a compute unit (cores that share
 676  *              resources). It also contains a per-package compute unit ID that
 677  *              identifies which compute unit the logical CPU is a part of.
 678  *
 679  *              On Zen-era systems this instead contains the number of threads
 680  *              per core and the ID of the core that the logical CPU is a part
 681  *              of. Note, this ID is unique only to the package, it is not
 682  *              globally unique across the entire system.
 683  *
 684  *      %ecx    This contains the number of nodes that exist in the package. It
 685  *              also contains an ID that identifies which node the logical CPU
 686  *              is a part of.
 687  *
 688  * Finally, we also use cpuid leaf 0x8000001D to determine information about the
 689  * cache layout to determine which logical CPUs are sharing which caches.
 690  *
 691  * illumos Topology
 692  * ----------------
 693  *
 694  * Based on the above we synthesize the information into several different
 695  * variables that we store in the 'struct cpuid_info'. We'll go into the details
 696  * of what each member is supposed to represent and their uniqueness. In
 697  * general, there are two levels of uniqueness that we care about. We care about
 698  * an ID that is globally unique. That means that it will be unique across all
 699  * entities in the system. For example, the default logical CPU ID is globally
 700  * unique. On the other hand, there is some information that we only care about
 701  * being unique within the context of a single package / socket. Here are the
 702  * variables that we keep track of and their meaning.
 703  *
 704  * Several of the values that are asking for an identifier, with the exception
 705  * of cpi_apicid, are allowed to be synthetic.
 706  *
 707  *
 708  * cpi_apicid
 709  *
 710  *      This is the value of the CPU's APIC id. This should be the full 32-bit
 711  *      ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
 712  *      APIC ID. This value is globally unique between all logical CPUs across
 713  *      all packages. This is usually required by the APIC.
 714  *
 715  * cpi_chipid
 716  *
 717  *      This value indicates the ID of the package that the logical CPU is a
 718  *      part of. This value is allowed to be synthetic. It is usually derived by
 719  *      taking the CPU's APIC ID and determining how many bits are used to
 720  *      represent CPU cores in the package. All logical CPUs that are part of
 721  *      the same package must have the same value.
 722  *
 723  * cpi_coreid
 724  *
 725  *      This represents the ID of a CPU core. Two logical CPUs should only have
 726  *      the same cpi_coreid value if they are part of the same core. These
 727  *      values may be synthetic. On systems that support SMT, this value is
 728  *      usually derived from the APIC ID, otherwise it is often synthetic and
 729  *      just set to the value of the cpu_id in the cpu_t.
 730  *
 731  * cpi_pkgcoreid
 732  *
 733  *      This is similar to the cpi_coreid in that logical CPUs that are part of
 734  *      the same core should have the same ID. The main difference is that these
 735  *      values are only required to be unique to a given socket.
 736  *
 737  * cpi_clogid
 738  *
 739  *      This represents the logical ID of a logical CPU. This value should be
 740  *      unique within a given socket for each logical CPU. This is allowed to be
 741  *      synthetic, though it is usually based off of the CPU's apic ID. The
 742  *      broader system expects that logical CPUs that have are part of the same
 743  *      core have contiguous numbers. For example, if there were two threads per
 744  *      core, then the core IDs divided by two should be the same and the first
 745  *      modulus two should be zero and the second one. For example, IDs 4 and 5
 746  *      indicate two logical CPUs that are part of the same core. But IDs 5 and
 747  *      6 represent two logical CPUs that are part of different cores.
 748  *
 749  *      While it is common for the cpi_coreid and the cpi_clogid to be derived
 750  *      from the same source, strictly speaking, they don't have to be and the
 751  *      two values should be considered logically independent. One should not
 752  *      try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
 753  *      some kind of relationship. While this is tempting, we've seen cases on
 754  *      AMD family 0xf where the system's cpu id is not related to its APIC ID.
 755  *
 756  * cpi_ncpu_per_chip
 757  *
 758  *      This value indicates the total number of logical CPUs that exist in the
 759  *      physical package. Critically, this is not the number of logical CPUs
 760  *      that exist for just the single core.
 761  *
 762  *      This value should be the same for all logical CPUs in the same package.
 763  *
 764  * cpi_ncore_per_chip
 765  *
 766  *      This value indicates the total number of physical CPU cores that exist
 767  *      in the package. The system compares this value with cpi_ncpu_per_chip to
 768  *      determine if simultaneous multi-threading (SMT) is enabled. When
 769  *      cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
 770  *      the X86FSET_HTT feature is not set. If this value is greater than one,
 771  *      than we consider the processor to have the feature X86FSET_CMP, to
 772  *      indicate that there is support for more than one core.
 773  *
 774  *      This value should be the same for all logical CPUs in the same package.
 775  *
 776  * cpi_procnodes_per_pkg
 777  *
 778  *      This value indicates the number of 'nodes' that exist in the package.
 779  *      When processors are actually a multi-chip module, this represents the
 780  *      number of such modules that exist in the package. Currently, on Intel
 781  *      based systems this member is always set to 1.
 782  *
 783  *      This value should be the same for all logical CPUs in the same package.
 784  *
 785  * cpi_procnodeid
 786  *
 787  *      This value indicates the ID of the node that the logical CPU is a part
 788  *      of. All logical CPUs that are in the same node must have the same value
 789  *      here. This value must be unique across all of the packages in the
 790  *      system.  On Intel based systems, this is currently set to the value in
 791  *      cpi_chipid because there is only one node.
 792  *
 793  * cpi_cores_per_compunit
 794  *
 795  *      This value indicates the number of cores that are part of a compute
 796  *      unit. See the AMD topology section for this. This member only has real
 797  *      meaning currently for AMD Bulldozer family processors. For all other
 798  *      processors, this should currently be set to 1.
 799  *
 800  * cpi_compunitid
 801  *
 802  *      This indicates the compute unit that the logical CPU belongs to. For
 803  *      processors without AMD Bulldozer-style compute units this should be set
 804  *      to the value of cpi_coreid.
 805  *
 806  * cpi_ncpu_shr_last_cache
 807  *
 808  *      This indicates the number of logical CPUs that are sharing the same last
 809  *      level cache. This value should be the same for all CPUs that are sharing
 810  *      that cache. The last cache refers to the cache that is closest to memory
 811  *      and furthest away from the CPU.
 812  *
 813  * cpi_last_lvl_cacheid
 814  *
 815  *      This indicates the ID of the last cache that the logical CPU uses. This
 816  *      cache is often shared between multiple logical CPUs and is the cache
 817  *      that is closest to memory and furthest away from the CPU. This value
 818  *      should be the same for a group of logical CPUs only if they actually
 819  *      share the same last level cache. IDs should not overlap between
 820  *      packages.
 821  *
 822  * cpi_ncore_bits
 823  *
 824  *      This indicates the number of bits that are required to represent all of
 825  *      the cores in the system. As cores are derived based on their APIC IDs,
 826  *      we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
 827  *      this value to be larger than the actual number of IDs that are present
 828  *      in the system. This is used to size tables by the CMI framework. It is
 829  *      only filled in for Intel and AMD CPUs.
 830  *
 831  * cpi_nthread_bits
 832  *
 833  *      This indicates the number of bits required to represent all of the IDs
 834  *      that cover the logical CPUs that exist on a given core. It's OK for this
 835  *      value to be larger than the actual number of IDs that are present in the
 836  *      system.  This is used to size tables by the CMI framework. It is
 837  *      only filled in for Intel and AMD CPUs.
 838  *
 839  * -----------
 840  * Hypervisors
 841  * -----------
 842  *
 843  * If trying to manage the differences between vendors wasn't bad enough, it can
 844  * get worse thanks to our friend hardware virtualization. Hypervisors are given
 845  * the ability to interpose on all cpuid instructions and change them to suit
 846  * their purposes. In general, this is necessary as the hypervisor wants to be
 847  * able to present a more uniform set of features or not necessarily give the
 848  * guest operating system kernel knowledge of all features so it can be
 849  * more easily migrated between systems.
 850  *
 851  * When it comes to trying to determine topology information, this can be a
 852  * double edged sword. When a hypervisor doesn't actually implement a cpuid
 853  * leaf, it'll often return all zeros. Because of that, you'll often see various
 854  * checks scattered about fields being non-zero before we assume we can use
 855  * them.
 856  *
 857  * When it comes to topology information, the hypervisor is often incentivized
 858  * to lie to you about topology. This is because it doesn't always actually
 859  * guarantee that topology at all. The topology path we take in the system
 860  * depends on how the CPU advertises itself. If it advertises itself as an Intel
 861  * or AMD CPU, then we basically do our normal path. However, when they don't
 862  * use an actual vendor, then that usually turns into multiple one-core CPUs
 863  * that we enumerate that are often on different sockets. The actual behavior
 864  * depends greatly on what the hypervisor actually exposes to us.
 865  *
 866  * --------------------
 867  * Exposing Information
 868  * --------------------
 869  *
 870  * We expose CPUID information in three different forms in the system.
 871  *
 872  * The first is through the x86_featureset variable. This is used in conjunction
 873  * with the is_x86_feature() function. This is queried by x86-specific functions
 874  * to determine which features are or aren't present in the system and to make
 875  * decisions based upon them. For example, users of this include everything from
 876  * parts of the system dedicated to reliability, availability, and
 877  * serviceability (RAS), to making decisions about how to handle security
 878  * mitigations, to various x86-specific drivers. General purpose or
 879  * architecture independent drivers should never be calling this function.
 880  *
 881  * The second means is through the auxiliary vector. The auxiliary vector is a
 882  * series of tagged data that the kernel passes down to a user program when it
 883  * begins executing. This information is used to indicate to programs what
 884  * instruction set extensions are present. For example, information about the
 885  * CPU supporting the machine check architecture (MCA) wouldn't be passed down
 886  * since user programs cannot make use of it. However, things like the AVX
 887  * instruction sets are. Programs use this information to make run-time
 888  * decisions about what features they should use. As an example, the run-time
 889  * link-editor (rtld) can relocate different functions depending on the hardware
 890  * support available.
 891  *
 892  * The final form is through a series of accessor functions that all have the
 893  * form cpuid_get*. This is used by a number of different subsystems in the
 894  * kernel to determine more detailed information about what we're running on,
 895  * topology information, etc. Some of these subsystems include processor groups
 896  * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
 897  * microcode, and performance monitoring. These functions all ASSERT that the
 898  * CPU they're being called on has reached a certain cpuid pass. If the passes
 899  * are rearranged, then this needs to be adjusted.
 900  */
 901 
 902 #include <sys/types.h>
 903 #include <sys/archsystm.h>
 904 #include <sys/x86_archext.h>
 905 #include <sys/kmem.h>
 906 #include <sys/systm.h>
 907 #include <sys/cmn_err.h>
 908 #include <sys/sunddi.h>
 909 #include <sys/sunndi.h>
 910 #include <sys/cpuvar.h>
 911 #include <sys/processor.h>
 912 #include <sys/sysmacros.h>
 913 #include <sys/pg.h>
 914 #include <sys/fp.h>
 915 #include <sys/controlregs.h>
 916 #include <sys/bitmap.h>
 917 #include <sys/auxv_386.h>
 918 #include <sys/memnode.h>
 919 #include <sys/pci_cfgspace.h>
 920 #include <sys/comm_page.h>
 921 #include <sys/mach_mmu.h>
 922 #include <sys/ucode.h>
 923 #include <sys/tsc.h>
 924 
 925 #ifdef __xpv
 926 #include <sys/hypervisor.h>
 927 #else
 928 #include <sys/ontrap.h>
 929 #endif
 930 
 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
 932 uint_t x86_type = X86_TYPE_OTHER;
 933 uint_t x86_clflush_size = 0;
 934 
 935 #if defined(__xpv)
 936 int x86_use_pcid = 0;
 937 int x86_use_invpcid = 0;
 938 #else
 939 int x86_use_pcid = -1;
 940 int x86_use_invpcid = -1;
 941 #endif
 942 
 943 uint_t pentiumpro_bug4046376;
 944 
 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 946 
 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
 948         "lgpg",
 949         "tsc",
 950         "msr",
 951         "mtrr",
 952         "pge",
 953         "de",
 954         "cmov",
 955         "mmx",
 956         "mca",
 957         "pae",
 958         "cv8",
 959         "pat",
 960         "sep",
 961         "sse",
 962         "sse2",
 963         "htt",
 964         "asysc",
 965         "nx",
 966         "sse3",
 967         "cx16",
 968         "cmp",
 969         "tscp",
 970         "mwait",
 971         "sse4a",
 972         "cpuid",
 973         "ssse3",
 974         "sse4_1",
 975         "sse4_2",
 976         "1gpg",
 977         "clfsh",
 978         "64",
 979         "aes",
 980         "pclmulqdq",
 981         "xsave",
 982         "avx",
 983         "vmx",
 984         "svm",
 985         "topoext",
 986         "f16c",
 987         "rdrand",
 988         "x2apic",
 989         "avx2",
 990         "bmi1",
 991         "bmi2",
 992         "fma",
 993         "smep",
 994         "smap",
 995         "adx",
 996         "rdseed",
 997         "mpx",
 998         "avx512f",
 999         "avx512dq",
1000         "avx512pf",
1001         "avx512er",
1002         "avx512cd",
1003         "avx512bw",
1004         "avx512vl",
1005         "avx512fma",
1006         "avx512vbmi",
1007         "avx512_vpopcntdq",
1008         "avx512_4vnniw",
1009         "avx512_4fmaps",
1010         "xsaveopt",
1011         "xsavec",
1012         "xsaves",
1013         "sha",
1014         "umip",
1015         "pku",
1016         "ospke",
1017         "pcid",
1018         "invpcid",
1019         "ibrs",
1020         "ibpb",
1021         "stibp",
1022         "ssbd",
1023         "ssbd_virt",
1024         "rdcl_no",
1025         "ibrs_all",
1026         "rsba",
1027         "ssb_no",
1028         "stibp_all",
1029         "flush_cmd",
1030         "l1d_vmentry_no",
1031         "fsgsbase",
1032         "clflushopt",
1033         "clwb",
1034         "monitorx",
1035         "clzero",
1036         "xop",
1037         "fma4",
1038         "tbm",
1039         "avx512_vnni"
1040 };
1041 
1042 boolean_t
1043 is_x86_feature(void *featureset, uint_t feature)
1044 {
1045         ASSERT(feature < NUM_X86_FEATURES);
1046         return (BT_TEST((ulong_t *)featureset, feature));
1047 }
1048 
1049 void
1050 add_x86_feature(void *featureset, uint_t feature)
1051 {
1052         ASSERT(feature < NUM_X86_FEATURES);
1053         BT_SET((ulong_t *)featureset, feature);
1054 }
1055 
1056 void
1057 remove_x86_feature(void *featureset, uint_t feature)
1058 {
1059         ASSERT(feature < NUM_X86_FEATURES);
1060         BT_CLEAR((ulong_t *)featureset, feature);
1061 }
1062 
1063 boolean_t
1064 compare_x86_featureset(void *setA, void *setB)
1065 {
1066         /*
1067          * We assume that the unused bits of the bitmap are always zero.
1068          */
1069         if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1070                 return (B_TRUE);
1071         } else {
1072                 return (B_FALSE);
1073         }
1074 }
1075 
1076 void
1077 print_x86_featureset(void *featureset)
1078 {
1079         uint_t i;
1080 
1081         for (i = 0; i < NUM_X86_FEATURES; i++) {
1082                 if (is_x86_feature(featureset, i)) {
1083                         cmn_err(CE_CONT, "?x86_feature: %s\n",
1084                             x86_feature_names[i]);
1085                 }
1086         }
1087 }
1088 
1089 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1090 static size_t xsave_state_size = 0;
1091 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1092 boolean_t xsave_force_disable = B_FALSE;
1093 extern int disable_smap;
1094 
1095 /*
1096  * This is set to platform type we are running on.
1097  */
1098 static int platform_type = -1;
1099 
1100 #if !defined(__xpv)
1101 /*
1102  * Variable to patch if hypervisor platform detection needs to be
1103  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1104  */
1105 int enable_platform_detection = 1;
1106 #endif
1107 
1108 /*
1109  * monitor/mwait info.
1110  *
1111  * size_actual and buf_actual are the real address and size allocated to get
1112  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
1113  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
1114  * processor cache-line alignment, but this is not guarantied in the furture.
1115  */
1116 struct mwait_info {
1117         size_t          mon_min;        /* min size to avoid missed wakeups */
1118         size_t          mon_max;        /* size to avoid false wakeups */
1119         size_t          size_actual;    /* size actually allocated */
1120         void            *buf_actual;    /* memory actually allocated */
1121         uint32_t        support;        /* processor support of monitor/mwait */
1122 };
1123 
1124 /*
1125  * xsave/xrestor info.
1126  *
1127  * This structure contains HW feature bits and the size of the xsave save area.
1128  * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1129  * (xsave_state) to describe the xsave layout. However, at runtime the
1130  * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1131  * xsave_state structure simply represents the legacy layout of the beginning
1132  * of the xsave area.
1133  */
1134 struct xsave_info {
1135         uint32_t        xsav_hw_features_low;   /* Supported HW features */
1136         uint32_t        xsav_hw_features_high;  /* Supported HW features */
1137         size_t          xsav_max_size;  /* max size save area for HW features */
1138         size_t          ymm_size;       /* AVX: size of ymm save area */
1139         size_t          ymm_offset;     /* AVX: offset for ymm save area */
1140         size_t          bndregs_size;   /* MPX: size of bndregs save area */
1141         size_t          bndregs_offset; /* MPX: offset for bndregs save area */
1142         size_t          bndcsr_size;    /* MPX: size of bndcsr save area */
1143         size_t          bndcsr_offset;  /* MPX: offset for bndcsr save area */
1144         size_t          opmask_size;    /* AVX512: size of opmask save */
1145         size_t          opmask_offset;  /* AVX512: offset for opmask save */
1146         size_t          zmmlo_size;     /* AVX512: size of zmm 256 save */
1147         size_t          zmmlo_offset;   /* AVX512: offset for zmm 256 save */
1148         size_t          zmmhi_size;     /* AVX512: size of zmm hi reg save */
1149         size_t          zmmhi_offset;   /* AVX512: offset for zmm hi reg save */
1150 };
1151 
1152 
1153 /*
1154  * These constants determine how many of the elements of the
1155  * cpuid we cache in the cpuid_info data structure; the
1156  * remaining elements are accessible via the cpuid instruction.
1157  */
1158 
1159 #define NMAX_CPI_STD    8               /* eax = 0 .. 7 */
1160 #define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
1161 
1162 /*
1163  * See the big theory statement for a more detailed explanation of what some of
1164  * these members mean.
1165  */
1166 struct cpuid_info {
1167         uint_t cpi_pass;                /* last pass completed */
1168         /*
1169          * standard function information
1170          */
1171         uint_t cpi_maxeax;              /* fn 0: %eax */
1172         char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
1173         uint_t cpi_vendor;              /* enum of cpi_vendorstr */
1174 
1175         uint_t cpi_family;              /* fn 1: extended family */
1176         uint_t cpi_model;               /* fn 1: extended model */
1177         uint_t cpi_step;                /* fn 1: stepping */
1178         chipid_t cpi_chipid;            /* fn 1: %ebx:  Intel: chip # */
1179                                         /*              AMD: package/socket # */
1180         uint_t cpi_brandid;             /* fn 1: %ebx: brand ID */
1181         int cpi_clogid;                 /* fn 1: %ebx: thread # */
1182         uint_t cpi_ncpu_per_chip;       /* fn 1: %ebx: logical cpu count */
1183         uint8_t cpi_cacheinfo[16];      /* fn 2: intel-style cache desc */
1184         uint_t cpi_ncache;              /* fn 2: number of elements */
1185         uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1186         id_t cpi_last_lvl_cacheid;      /* fn 4: %eax: derived cache id */
1187         uint_t cpi_cache_leaf_size;     /* Number of cache elements */
1188                                         /* Intel fn: 4, AMD fn: 8000001d */
1189         struct cpuid_regs **cpi_cache_leaves;   /* Acual leaves from above */
1190         struct cpuid_regs cpi_std[NMAX_CPI_STD];        /* 0 .. 7 */
1191         /*
1192          * extended function information
1193          */
1194         uint_t cpi_xmaxeax;             /* fn 0x80000000: %eax */
1195         char cpi_brandstr[49];          /* fn 0x8000000[234] */
1196         uint8_t cpi_pabits;             /* fn 0x80000006: %eax */
1197         uint8_t cpi_vabits;             /* fn 0x80000006: %eax */
1198         uint8_t cpi_fp_amd_save;        /* AMD: FP error pointer save rqd. */
1199         struct  cpuid_regs cpi_extd[NMAX_CPI_EXTD];     /* 0x800000XX */
1200 
1201         id_t cpi_coreid;                /* same coreid => strands share core */
1202         int cpi_pkgcoreid;              /* core number within single package */
1203         uint_t cpi_ncore_per_chip;      /* AMD: fn 0x80000008: %ecx[7-0] */
1204                                         /* Intel: fn 4: %eax[31-26] */
1205 
1206         /*
1207          * These values represent the number of bits that are required to store
1208          * information about the number of cores and threads.
1209          */
1210         uint_t cpi_ncore_bits;
1211         uint_t cpi_nthread_bits;
1212         /*
1213          * supported feature information
1214          */
1215         uint32_t cpi_support[6];
1216 #define STD_EDX_FEATURES        0
1217 #define AMD_EDX_FEATURES        1
1218 #define TM_EDX_FEATURES         2
1219 #define STD_ECX_FEATURES        3
1220 #define AMD_ECX_FEATURES        4
1221 #define STD_EBX_FEATURES        5
1222         /*
1223          * Synthesized information, where known.
1224          */
1225         uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
1226         const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
1227         uint32_t cpi_socket;            /* Chip package/socket type */
1228 
1229         struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
1230         uint32_t cpi_apicid;
1231         uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
1232         uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
1233                                         /* Intel: 1 */
1234         uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
1235         uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
1236 
1237         struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
1238 };
1239 
1240 
1241 static struct cpuid_info cpuid_info0;
1242 
1243 /*
1244  * These bit fields are defined by the Intel Application Note AP-485
1245  * "Intel Processor Identification and the CPUID Instruction"
1246  */
1247 #define CPI_FAMILY_XTD(cpi)     BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1248 #define CPI_MODEL_XTD(cpi)      BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1249 #define CPI_TYPE(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1250 #define CPI_FAMILY(cpi)         BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1251 #define CPI_STEP(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1252 #define CPI_MODEL(cpi)          BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1253 
1254 #define CPI_FEATURES_EDX(cpi)           ((cpi)->cpi_std[1].cp_edx)
1255 #define CPI_FEATURES_ECX(cpi)           ((cpi)->cpi_std[1].cp_ecx)
1256 #define CPI_FEATURES_XTD_EDX(cpi)       ((cpi)->cpi_extd[1].cp_edx)
1257 #define CPI_FEATURES_XTD_ECX(cpi)       ((cpi)->cpi_extd[1].cp_ecx)
1258 #define CPI_FEATURES_7_0_EBX(cpi)       ((cpi)->cpi_std[7].cp_ebx)
1259 #define CPI_FEATURES_7_0_ECX(cpi)       ((cpi)->cpi_std[7].cp_ecx)
1260 #define CPI_FEATURES_7_0_EDX(cpi)       ((cpi)->cpi_std[7].cp_edx)
1261 
1262 #define CPI_BRANDID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1263 #define CPI_CHUNKS(cpi)         BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1264 #define CPI_CPU_COUNT(cpi)      BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1265 #define CPI_APIC_ID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1266 
1267 #define CPI_MAXEAX_MAX          0x100           /* sanity control */
1268 #define CPI_XMAXEAX_MAX         0x80000100
1269 #define CPI_FN4_ECX_MAX         0x20            /* sanity: max fn 4 levels */
1270 #define CPI_FNB_ECX_MAX         0x20            /* sanity: max fn B levels */
1271 
1272 /*
1273  * Function 4 (Deterministic Cache Parameters) macros
1274  * Defined by Intel Application Note AP-485
1275  */
1276 #define CPI_NUM_CORES(regs)             BITX((regs)->cp_eax, 31, 26)
1277 #define CPI_NTHR_SHR_CACHE(regs)        BITX((regs)->cp_eax, 25, 14)
1278 #define CPI_FULL_ASSOC_CACHE(regs)      BITX((regs)->cp_eax, 9, 9)
1279 #define CPI_SELF_INIT_CACHE(regs)       BITX((regs)->cp_eax, 8, 8)
1280 #define CPI_CACHE_LVL(regs)             BITX((regs)->cp_eax, 7, 5)
1281 #define CPI_CACHE_TYPE(regs)            BITX((regs)->cp_eax, 4, 0)
1282 #define CPI_CPU_LEVEL_TYPE(regs)        BITX((regs)->cp_ecx, 15, 8)
1283 
1284 #define CPI_CACHE_WAYS(regs)            BITX((regs)->cp_ebx, 31, 22)
1285 #define CPI_CACHE_PARTS(regs)           BITX((regs)->cp_ebx, 21, 12)
1286 #define CPI_CACHE_COH_LN_SZ(regs)       BITX((regs)->cp_ebx, 11, 0)
1287 
1288 #define CPI_CACHE_SETS(regs)            BITX((regs)->cp_ecx, 31, 0)
1289 
1290 #define CPI_PREFCH_STRIDE(regs)         BITX((regs)->cp_edx, 9, 0)
1291 
1292 
1293 /*
1294  * A couple of shorthand macros to identify "later" P6-family chips
1295  * like the Pentium M and Core.  First, the "older" P6-based stuff
1296  * (loosely defined as "pre-Pentium-4"):
1297  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1298  */
1299 #define IS_LEGACY_P6(cpi) (                     \
1300         cpi->cpi_family == 6 &&                      \
1301                 (cpi->cpi_model == 1 ||              \
1302                 cpi->cpi_model == 3 ||               \
1303                 cpi->cpi_model == 5 ||               \
1304                 cpi->cpi_model == 6 ||               \
1305                 cpi->cpi_model == 7 ||               \
1306                 cpi->cpi_model == 8 ||               \
1307                 cpi->cpi_model == 0xA ||     \
1308                 cpi->cpi_model == 0xB)               \
1309 )
1310 
1311 /* A "new F6" is everything with family 6 that's not the above */
1312 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1313 
1314 /* Extended family/model support */
1315 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1316         cpi->cpi_family >= 0xf)
1317 
1318 /*
1319  * Info for monitor/mwait idle loop.
1320  *
1321  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1322  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1323  * 2006.
1324  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1325  * Documentation Updates" #33633, Rev 2.05, December 2006.
1326  */
1327 #define MWAIT_SUPPORT           (0x00000001)    /* mwait supported */
1328 #define MWAIT_EXTENSIONS        (0x00000002)    /* extenstion supported */
1329 #define MWAIT_ECX_INT_ENABLE    (0x00000004)    /* ecx 1 extension supported */
1330 #define MWAIT_SUPPORTED(cpi)    ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1331 #define MWAIT_INT_ENABLE(cpi)   ((cpi)->cpi_std[5].cp_ecx & 0x2)
1332 #define MWAIT_EXTENSION(cpi)    ((cpi)->cpi_std[5].cp_ecx & 0x1)
1333 #define MWAIT_SIZE_MIN(cpi)     BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1334 #define MWAIT_SIZE_MAX(cpi)     BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1335 /*
1336  * Number of sub-cstates for a given c-state.
1337  */
1338 #define MWAIT_NUM_SUBC_STATES(cpi, c_state)                     \
1339         BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1340 
1341 /*
1342  * XSAVE leaf 0xD enumeration
1343  */
1344 #define CPUID_LEAFD_2_YMM_OFFSET        576
1345 #define CPUID_LEAFD_2_YMM_SIZE          256
1346 
1347 /*
1348  * Common extended leaf names to cut down on typos.
1349  */
1350 #define CPUID_LEAF_EXT_0                0x80000000
1351 #define CPUID_LEAF_EXT_8                0x80000008
1352 #define CPUID_LEAF_EXT_1d               0x8000001d
1353 #define CPUID_LEAF_EXT_1e               0x8000001e
1354 
1355 /*
1356  * Functions we consune from cpuid_subr.c;  don't publish these in a header
1357  * file to try and keep people using the expected cpuid_* interfaces.
1358  */
1359 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1360 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1361 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1362 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1363 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1364 
1365 /*
1366  * Apply up various platform-dependent restrictions where the
1367  * underlying platform restrictions mean the CPU can be marked
1368  * as less capable than its cpuid instruction would imply.
1369  */
1370 #if defined(__xpv)
1371 static void
1372 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1373 {
1374         switch (eax) {
1375         case 1: {
1376                 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1377                     0 : CPUID_INTC_EDX_MCA;
1378                 cp->cp_edx &=
1379                     ~(mcamask |
1380                     CPUID_INTC_EDX_PSE |
1381                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1382                     CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1383                     CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1384                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1385                     CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1386                 break;
1387         }
1388 
1389         case 0x80000001:
1390                 cp->cp_edx &=
1391                     ~(CPUID_AMD_EDX_PSE |
1392                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1393                     CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1394                     CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1395                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1396                     CPUID_AMD_EDX_TSCP);
1397                 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1398                 break;
1399         default:
1400                 break;
1401         }
1402 
1403         switch (vendor) {
1404         case X86_VENDOR_Intel:
1405                 switch (eax) {
1406                 case 4:
1407                         /*
1408                          * Zero out the (ncores-per-chip - 1) field
1409                          */
1410                         cp->cp_eax &= 0x03fffffff;
1411                         break;
1412                 default:
1413                         break;
1414                 }
1415                 break;
1416         case X86_VENDOR_AMD:
1417                 switch (eax) {
1418 
1419                 case 0x80000001:
1420                         cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1421                         break;
1422 
1423                 case CPUID_LEAF_EXT_8:
1424                         /*
1425                          * Zero out the (ncores-per-chip - 1) field
1426                          */
1427                         cp->cp_ecx &= 0xffffff00;
1428                         break;
1429                 default:
1430                         break;
1431                 }
1432                 break;
1433         default:
1434                 break;
1435         }
1436 }
1437 #else
1438 #define platform_cpuid_mangle(vendor, eax, cp)  /* nothing */
1439 #endif
1440 
1441 /*
1442  *  Some undocumented ways of patching the results of the cpuid
1443  *  instruction to permit running Solaris 10 on future cpus that
1444  *  we don't currently support.  Could be set to non-zero values
1445  *  via settings in eeprom.
1446  */
1447 
1448 uint32_t cpuid_feature_ecx_include;
1449 uint32_t cpuid_feature_ecx_exclude;
1450 uint32_t cpuid_feature_edx_include;
1451 uint32_t cpuid_feature_edx_exclude;
1452 
1453 /*
1454  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1455  */
1456 void
1457 cpuid_alloc_space(cpu_t *cpu)
1458 {
1459         /*
1460          * By convention, cpu0 is the boot cpu, which is set up
1461          * before memory allocation is available.  All other cpus get
1462          * their cpuid_info struct allocated here.
1463          */
1464         ASSERT(cpu->cpu_id != 0);
1465         ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1466         cpu->cpu_m.mcpu_cpi =
1467             kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1468 }
1469 
1470 void
1471 cpuid_free_space(cpu_t *cpu)
1472 {
1473         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1474         int i;
1475 
1476         ASSERT(cpi != NULL);
1477         ASSERT(cpi != &cpuid_info0);
1478 
1479         /*
1480          * Free up any cache leaf related dynamic storage. The first entry was
1481          * cached from the standard cpuid storage, so we should not free it.
1482          */
1483         for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1484                 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1485         if (cpi->cpi_cache_leaf_size > 0)
1486                 kmem_free(cpi->cpi_cache_leaves,
1487                     cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1488 
1489         kmem_free(cpi, sizeof (*cpi));
1490         cpu->cpu_m.mcpu_cpi = NULL;
1491 }
1492 
1493 #if !defined(__xpv)
1494 /*
1495  * Determine the type of the underlying platform. This is used to customize
1496  * initialization of various subsystems (e.g. TSC). determine_platform() must
1497  * only ever be called once to prevent two processors from seeing different
1498  * values of platform_type. Must be called before cpuid_pass1(), the earliest
1499  * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1500  */
1501 void
1502 determine_platform(void)
1503 {
1504         struct cpuid_regs cp;
1505         uint32_t base;
1506         uint32_t regs[4];
1507         char *hvstr = (char *)regs;
1508 
1509         ASSERT(platform_type == -1);
1510 
1511         platform_type = HW_NATIVE;
1512 
1513         if (!enable_platform_detection)
1514                 return;
1515 
1516         /*
1517          * If Hypervisor CPUID bit is set, try to determine hypervisor
1518          * vendor signature, and set platform type accordingly.
1519          *
1520          * References:
1521          * http://lkml.org/lkml/2008/10/1/246
1522          * http://kb.vmware.com/kb/1009458
1523          */
1524         cp.cp_eax = 0x1;
1525         (void) __cpuid_insn(&cp);
1526         if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1527                 cp.cp_eax = 0x40000000;
1528                 (void) __cpuid_insn(&cp);
1529                 regs[0] = cp.cp_ebx;
1530                 regs[1] = cp.cp_ecx;
1531                 regs[2] = cp.cp_edx;
1532                 regs[3] = 0;
1533                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1534                         platform_type = HW_XEN_HVM;
1535                         return;
1536                 }
1537                 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1538                         platform_type = HW_VMWARE;
1539                         return;
1540                 }
1541                 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1542                         platform_type = HW_KVM;
1543                         return;
1544                 }
1545                 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1546                         platform_type = HW_BHYVE;
1547                         return;
1548                 }
1549                 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1550                         platform_type = HW_MICROSOFT;
1551         } else {
1552                 /*
1553                  * Check older VMware hardware versions. VMware hypervisor is
1554                  * detected by performing an IN operation to VMware hypervisor
1555                  * port and checking that value returned in %ebx is VMware
1556                  * hypervisor magic value.
1557                  *
1558                  * References: http://kb.vmware.com/kb/1009458
1559                  */
1560                 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1561                 if (regs[1] == VMWARE_HVMAGIC) {
1562                         platform_type = HW_VMWARE;
1563                         return;
1564                 }
1565         }
1566 
1567         /*
1568          * Check Xen hypervisor. In a fully virtualized domain,
1569          * Xen's pseudo-cpuid function returns a string representing the
1570          * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1571          * supported cpuid function. We need at least a (base + 2) leaf value
1572          * to do what we want to do. Try different base values, since the
1573          * hypervisor might use a different one depending on whether Hyper-V
1574          * emulation is switched on by default or not.
1575          */
1576         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1577                 cp.cp_eax = base;
1578                 (void) __cpuid_insn(&cp);
1579                 regs[0] = cp.cp_ebx;
1580                 regs[1] = cp.cp_ecx;
1581                 regs[2] = cp.cp_edx;
1582                 regs[3] = 0;
1583                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1584                     cp.cp_eax >= (base + 2)) {
1585                         platform_type &= ~HW_NATIVE;
1586                         platform_type |= HW_XEN_HVM;
1587                         return;
1588                 }
1589         }
1590 }
1591 
1592 int
1593 get_hwenv(void)
1594 {
1595         ASSERT(platform_type != -1);
1596         return (platform_type);
1597 }
1598 
1599 int
1600 is_controldom(void)
1601 {
1602         return (0);
1603 }
1604 
1605 #else
1606 
1607 int
1608 get_hwenv(void)
1609 {
1610         return (HW_XEN_PV);
1611 }
1612 
1613 int
1614 is_controldom(void)
1615 {
1616         return (DOMAIN_IS_INITDOMAIN(xen_info));
1617 }
1618 
1619 #endif  /* __xpv */
1620 
1621 /*
1622  * Make sure that we have gathered all of the CPUID leaves that we might need to
1623  * determine topology. We assume that the standard leaf 1 has already been done
1624  * and that xmaxeax has already been calculated.
1625  */
1626 static void
1627 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1628 {
1629         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1630 
1631         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1632                 struct cpuid_regs *cp;
1633 
1634                 cp = &cpi->cpi_extd[8];
1635                 cp->cp_eax = CPUID_LEAF_EXT_8;
1636                 (void) __cpuid_insn(cp);
1637                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1638         }
1639 
1640         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1641             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1642                 struct cpuid_regs *cp;
1643 
1644                 cp = &cpi->cpi_extd[0x1e];
1645                 cp->cp_eax = CPUID_LEAF_EXT_1e;
1646                 (void) __cpuid_insn(cp);
1647         }
1648 }
1649 
1650 /*
1651  * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1652  * it to everything else. If not, and we're on an AMD system where 8000001e is
1653  * valid, then we use that. Othewrise, we fall back to the default value for the
1654  * APIC ID in leaf 1.
1655  */
1656 static uint32_t
1657 cpuid_gather_apicid(struct cpuid_info *cpi)
1658 {
1659         /*
1660          * Leaf B changes based on the arguments to it. Beacuse we don't cache
1661          * it, we need to gather it again.
1662          */
1663         if (cpi->cpi_maxeax >= 0xB) {
1664                 struct cpuid_regs regs;
1665                 struct cpuid_regs *cp;
1666 
1667                 cp = &regs;
1668                 cp->cp_eax = 0xB;
1669                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1670                 (void) __cpuid_insn(cp);
1671 
1672                 if (cp->cp_ebx != 0) {
1673                         return (cp->cp_edx);
1674                 }
1675         }
1676 
1677         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1678             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1679             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1680                 return (cpi->cpi_extd[0x1e].cp_eax);
1681         }
1682 
1683         return (CPI_APIC_ID(cpi));
1684 }
1685 
1686 /*
1687  * For AMD processors, attempt to calculate the number of chips and cores that
1688  * exist. The way that we do this varies based on the generation, because the
1689  * generations themselves have changed dramatically.
1690  *
1691  * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1692  * However, with the advent of family 17h (Zen) it actually tells us the number
1693  * of threads, so we need to look at leaf 0x8000001e if available to determine
1694  * its value. Otherwise, for all prior families, the number of enabled cores is
1695  * the same as threads.
1696  *
1697  * If we do not have leaf 0x80000008, then we assume that this processor does
1698  * not have anything. AMD's older CPUID specification says there's no reason to
1699  * fall back to leaf 1.
1700  *
1701  * In some virtualization cases we will not have leaf 8000001e or it will be
1702  * zero. When that happens we assume the number of threads is one.
1703  */
1704 static void
1705 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1706 {
1707         uint_t nthreads, nthread_per_core;
1708 
1709         nthreads = nthread_per_core = 1;
1710 
1711         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1712                 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1713         } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1714                 nthreads = CPI_CPU_COUNT(cpi);
1715         }
1716 
1717         /*
1718          * For us to have threads, and know about it, we have to be at least at
1719          * family 17h and have the cpuid bit that says we have extended
1720          * topology.
1721          */
1722         if (cpi->cpi_family >= 0x17 &&
1723             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1724             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1725                 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1726         }
1727 
1728         *ncpus = nthreads;
1729         *ncores = nthreads / nthread_per_core;
1730 }
1731 
1732 /*
1733  * Seed the initial values for the cores and threads for an Intel based
1734  * processor. These values will be overwritten if we detect that the processor
1735  * supports CPUID leaf 0xb.
1736  */
1737 static void
1738 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1739 {
1740         /*
1741          * Only seed the number of physical cores from the first level leaf 4
1742          * information. The number of threads there indicate how many share the
1743          * L1 cache, which may or may not have anything to do with the number of
1744          * logical CPUs per core.
1745          */
1746         if (cpi->cpi_maxeax >= 4) {
1747                 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1748         } else {
1749                 *ncores = 1;
1750         }
1751 
1752         if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1753                 *ncpus = CPI_CPU_COUNT(cpi);
1754         } else {
1755                 *ncpus = *ncores;
1756         }
1757 }
1758 
1759 static boolean_t
1760 cpuid_leafB_getids(cpu_t *cpu)
1761 {
1762         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1763         struct cpuid_regs regs;
1764         struct cpuid_regs *cp;
1765 
1766         if (cpi->cpi_maxeax < 0xB)
1767                 return (B_FALSE);
1768 
1769         cp = &regs;
1770         cp->cp_eax = 0xB;
1771         cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1772 
1773         (void) __cpuid_insn(cp);
1774 
1775         /*
1776          * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1777          * indicates that the extended topology enumeration leaf is
1778          * available.
1779          */
1780         if (cp->cp_ebx != 0) {
1781                 uint32_t x2apic_id = 0;
1782                 uint_t coreid_shift = 0;
1783                 uint_t ncpu_per_core = 1;
1784                 uint_t chipid_shift = 0;
1785                 uint_t ncpu_per_chip = 1;
1786                 uint_t i;
1787                 uint_t level;
1788 
1789                 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1790                         cp->cp_eax = 0xB;
1791                         cp->cp_ecx = i;
1792 
1793                         (void) __cpuid_insn(cp);
1794                         level = CPI_CPU_LEVEL_TYPE(cp);
1795 
1796                         if (level == 1) {
1797                                 x2apic_id = cp->cp_edx;
1798                                 coreid_shift = BITX(cp->cp_eax, 4, 0);
1799                                 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1800                         } else if (level == 2) {
1801                                 x2apic_id = cp->cp_edx;
1802                                 chipid_shift = BITX(cp->cp_eax, 4, 0);
1803                                 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1804                         }
1805                 }
1806 
1807                 /*
1808                  * cpi_apicid is taken care of in cpuid_gather_apicid.
1809                  */
1810                 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1811                 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1812                     ncpu_per_core;
1813                 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1814                 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1815                 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1816                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1817                 cpi->cpi_procnodeid = cpi->cpi_chipid;
1818                 cpi->cpi_compunitid = cpi->cpi_coreid;
1819 
1820                 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1821                         cpi->cpi_nthread_bits = coreid_shift;
1822                         cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1823                 }
1824 
1825                 return (B_TRUE);
1826         } else {
1827                 return (B_FALSE);
1828         }
1829 }
1830 
1831 static void
1832 cpuid_intel_getids(cpu_t *cpu, void *feature)
1833 {
1834         uint_t i;
1835         uint_t chipid_shift = 0;
1836         uint_t coreid_shift = 0;
1837         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1838 
1839         /*
1840          * There are no compute units or processor nodes currently on Intel.
1841          * Always set these to one.
1842          */
1843         cpi->cpi_procnodes_per_pkg = 1;
1844         cpi->cpi_cores_per_compunit = 1;
1845 
1846         /*
1847          * If cpuid Leaf B is present, use that to try and get this information.
1848          * It will be the most accurate for Intel CPUs.
1849          */
1850         if (cpuid_leafB_getids(cpu))
1851                 return;
1852 
1853         /*
1854          * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1855          * and ncore_per_chip. These represent the largest power of two values
1856          * that we need to cover all of the IDs in the system. Therefore, we use
1857          * those values to seed the number of bits needed to cover information
1858          * in the case when leaf B is not available. These values will probably
1859          * be larger than required, but that's OK.
1860          */
1861         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1862         cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1863 
1864         for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1865                 chipid_shift++;
1866 
1867         cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1868         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1869 
1870         if (is_x86_feature(feature, X86FSET_CMP)) {
1871                 /*
1872                  * Multi-core (and possibly multi-threaded)
1873                  * processors.
1874                  */
1875                 uint_t ncpu_per_core;
1876                 if (cpi->cpi_ncore_per_chip == 1)
1877                         ncpu_per_core = cpi->cpi_ncpu_per_chip;
1878                 else if (cpi->cpi_ncore_per_chip > 1)
1879                         ncpu_per_core = cpi->cpi_ncpu_per_chip /
1880                             cpi->cpi_ncore_per_chip;
1881                 /*
1882                  * 8bit APIC IDs on dual core Pentiums
1883                  * look like this:
1884                  *
1885                  * +-----------------------+------+------+
1886                  * | Physical Package ID   |  MC  |  HT  |
1887                  * +-----------------------+------+------+
1888                  * <------- chipid -------->
1889                  * <------- coreid --------------->
1890                  *                         <--- clogid -->
1891                  *                         <------>
1892                  *                         pkgcoreid
1893                  *
1894                  * Where the number of bits necessary to
1895                  * represent MC and HT fields together equals
1896                  * to the minimum number of bits necessary to
1897                  * store the value of cpi->cpi_ncpu_per_chip.
1898                  * Of those bits, the MC part uses the number
1899                  * of bits necessary to store the value of
1900                  * cpi->cpi_ncore_per_chip.
1901                  */
1902                 for (i = 1; i < ncpu_per_core; i <<= 1)
1903                         coreid_shift++;
1904                 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1905                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1906         } else if (is_x86_feature(feature, X86FSET_HTT)) {
1907                 /*
1908                  * Single-core multi-threaded processors.
1909                  */
1910                 cpi->cpi_coreid = cpi->cpi_chipid;
1911                 cpi->cpi_pkgcoreid = 0;
1912         } else {
1913                 /*
1914                  * Single-core single-thread processors.
1915                  */
1916                 cpi->cpi_coreid = cpu->cpu_id;
1917                 cpi->cpi_pkgcoreid = 0;
1918         }
1919         cpi->cpi_procnodeid = cpi->cpi_chipid;
1920         cpi->cpi_compunitid = cpi->cpi_coreid;
1921 }
1922 
1923 /*
1924  * Historically, AMD has had CMP chips with only a single thread per core.
1925  * However, starting in family 17h (Zen), this has changed and they now have
1926  * multiple threads. Our internal core id needs to be a unique value.
1927  *
1928  * To determine the core id of an AMD system, if we're from a family before 17h,
1929  * then we just use the cpu id, as that gives us a good value that will be
1930  * unique for each core. If instead, we're on family 17h or later, then we need
1931  * to do something more complicated. CPUID leaf 0x8000001e can tell us
1932  * how many threads are in the system. Based on that, we'll shift the APIC ID.
1933  * We can't use the normal core id in that leaf as it's only unique within the
1934  * socket, which is perfect for cpi_pkgcoreid, but not us.
1935  */
1936 static id_t
1937 cpuid_amd_get_coreid(cpu_t *cpu)
1938 {
1939         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1940 
1941         if (cpi->cpi_family >= 0x17 &&
1942             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1943             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1944                 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1945                 if (nthreads > 1) {
1946                         VERIFY3U(nthreads, ==, 2);
1947                         return (cpi->cpi_apicid >> 1);
1948                 }
1949         }
1950 
1951         return (cpu->cpu_id);
1952 }
1953 
1954 /*
1955  * IDs on AMD is a more challenging task. This is notable because of the
1956  * following two facts:
1957  *
1958  *  1. Before family 0x17 (Zen), there was no support for SMT and there was
1959  *     also no way to get an actual unique core id from the system. As such, we
1960  *     synthesize this case by using cpu->cpu_id.  This scheme does not,
1961  *     however, guarantee that sibling cores of a chip will have sequential
1962  *     coreids starting at a multiple of the number of cores per chip - that is
1963  *     usually the case, but if the ACPI MADT table is presented in a different
1964  *     order then we need to perform a few more gymnastics for the pkgcoreid.
1965  *
1966  *  2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1967  *     called compute units. These compute units share the L1I cache, L2 cache,
1968  *     and the FPU. To deal with this, a new topology leaf was added in
1969  *     0x8000001e. However, parts of this leaf have different meanings
1970  *     once we get to family 0x17.
1971  */
1972 
1973 static void
1974 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1975 {
1976         int i, first_half, coreidsz;
1977         uint32_t nb_caps_reg;
1978         uint_t node2_1;
1979         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1980         struct cpuid_regs *cp;
1981 
1982         /*
1983          * Calculate the core id (this comes from hardware in family 0x17 if it
1984          * hasn't been stripped by virtualization). We always set the compute
1985          * unit id to the same value. Also, initialize the default number of
1986          * cores per compute unit and nodes per package. This will be
1987          * overwritten when we know information about a particular family.
1988          */
1989         cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1990         cpi->cpi_compunitid = cpi->cpi_coreid;
1991         cpi->cpi_cores_per_compunit = 1;
1992         cpi->cpi_procnodes_per_pkg = 1;
1993 
1994         /*
1995          * To construct the logical ID, we need to determine how many APIC IDs
1996          * are dedicated to the cores and threads. This is provided for us in
1997          * 0x80000008. However, if it's not present (say due to virtualization),
1998          * then we assume it's one. This should be present on all 64-bit AMD
1999          * processors.  It was added in family 0xf (Hammer).
2000          */
2001         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2002                 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2003 
2004                 /*
2005                  * In AMD parlance chip is really a node while illumos
2006                  * uses chip as equivalent to socket/package.
2007                  */
2008                 if (coreidsz == 0) {
2009                         /* Use legacy method */
2010                         for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2011                                 coreidsz++;
2012                         if (coreidsz == 0)
2013                                 coreidsz = 1;
2014                 }
2015         } else {
2016                 /* Assume single-core part */
2017                 coreidsz = 1;
2018         }
2019         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2020 
2021         /*
2022          * The package core ID varies depending on the family. For family 17h,
2023          * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2024          * can use the clogid as is. When family 17h is virtualized, the clogid
2025          * should be sufficient as if we don't have valid data in the leaf, then
2026          * we won't think we have SMT, in which case the cpi_clogid should be
2027          * sufficient.
2028          */
2029         if (cpi->cpi_family >= 0x17 &&
2030             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2031             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2032             cpi->cpi_extd[0x1e].cp_ebx != 0) {
2033                 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2034         } else {
2035                 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2036         }
2037 
2038         /*
2039          * Obtain the node ID and compute unit IDs. If we're on family 0x15
2040          * (bulldozer) or newer, then we can derive all of this from leaf
2041          * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2042          */
2043         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2044             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2045                 cp = &cpi->cpi_extd[0x1e];
2046 
2047                 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2048                 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2049 
2050                 /*
2051                  * For Bulldozer-era CPUs, recalculate the compute unit
2052                  * information.
2053                  */
2054                 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2055                         cpi->cpi_cores_per_compunit =
2056                             BITX(cp->cp_ebx, 15, 8) + 1;
2057                         cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2058                             (cpi->cpi_ncore_per_chip /
2059                             cpi->cpi_cores_per_compunit) *
2060                             (cpi->cpi_procnodeid /
2061                             cpi->cpi_procnodes_per_pkg);
2062                 }
2063         } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2064                 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2065         } else if (cpi->cpi_family == 0x10) {
2066                 /*
2067                  * See if we are a multi-node processor.
2068                  * All processors in the system have the same number of nodes
2069                  */
2070                 nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
2071                 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2072                         /* Single-node */
2073                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2074                             coreidsz);
2075                 } else {
2076 
2077                         /*
2078                          * Multi-node revision D (2 nodes per package
2079                          * are supported)
2080                          */
2081                         cpi->cpi_procnodes_per_pkg = 2;
2082 
2083                         first_half = (cpi->cpi_pkgcoreid <=
2084                             (cpi->cpi_ncore_per_chip/2 - 1));
2085 
2086                         if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2087                                 /* We are BSP */
2088                                 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2089                         } else {
2090 
2091                                 /* We are AP */
2092                                 /* NodeId[2:1] bits to use for reading F3xe8 */
2093                                 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2094 
2095                                 nb_caps_reg =
2096                                     pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2097 
2098                                 /*
2099                                  * Check IntNodeNum bit (31:30, but bit 31 is
2100                                  * always 0 on dual-node processors)
2101                                  */
2102                                 if (BITX(nb_caps_reg, 30, 30) == 0)
2103                                         cpi->cpi_procnodeid = node2_1 +
2104                                             !first_half;
2105                                 else
2106                                         cpi->cpi_procnodeid = node2_1 +
2107                                             first_half;
2108                         }
2109                 }
2110         } else {
2111                 cpi->cpi_procnodeid = 0;
2112         }
2113 
2114         cpi->cpi_chipid =
2115             cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2116 
2117         cpi->cpi_ncore_bits = coreidsz;
2118         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2119             cpi->cpi_ncore_per_chip);
2120 }
2121 
2122 static void
2123 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2124 {
2125         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2126 
2127         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2128             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2129                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2130                         add_x86_feature(featureset, X86FSET_IBPB);
2131                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2132                         add_x86_feature(featureset, X86FSET_IBRS);
2133                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2134                         add_x86_feature(featureset, X86FSET_STIBP);
2135                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2136                         add_x86_feature(featureset, X86FSET_IBRS_ALL);
2137                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2138                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2139                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2140                         add_x86_feature(featureset, X86FSET_RSBA);
2141                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2142                         add_x86_feature(featureset, X86FSET_SSBD);
2143                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2144                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2145                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2146                         add_x86_feature(featureset, X86FSET_SSB_NO);
2147         } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2148             cpi->cpi_maxeax >= 7) {
2149                 struct cpuid_regs *ecp;
2150                 ecp = &cpi->cpi_std[7];
2151 
2152                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2153                         add_x86_feature(featureset, X86FSET_IBRS);
2154                         add_x86_feature(featureset, X86FSET_IBPB);
2155                 }
2156 
2157                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2158                         add_x86_feature(featureset, X86FSET_STIBP);
2159                 }
2160 
2161                 /*
2162                  * Don't read the arch caps MSR on xpv where we lack the
2163                  * on_trap().
2164                  */
2165 #ifndef __xpv
2166                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2167                         on_trap_data_t otd;
2168 
2169                         /*
2170                          * Be paranoid and assume we'll get a #GP.
2171                          */
2172                         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2173                                 uint64_t reg;
2174 
2175                                 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2176                                 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2177                                         add_x86_feature(featureset,
2178                                             X86FSET_RDCL_NO);
2179                                 }
2180                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2181                                         add_x86_feature(featureset,
2182                                             X86FSET_IBRS_ALL);
2183                                 }
2184                                 if (reg & IA32_ARCH_CAP_RSBA) {
2185                                         add_x86_feature(featureset,
2186                                             X86FSET_RSBA);
2187                                 }
2188                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2189                                         add_x86_feature(featureset,
2190                                             X86FSET_L1D_VM_NO);
2191                                 }
2192                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2193                                         add_x86_feature(featureset,
2194                                             X86FSET_SSB_NO);
2195                                 }
2196                         }
2197                         no_trap();
2198                 }
2199 #endif  /* !__xpv */
2200 
2201                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2202                         add_x86_feature(featureset, X86FSET_SSBD);
2203 
2204                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2205                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2206         }
2207 }
2208 
2209 /*
2210  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2211  */
2212 void
2213 setup_xfem(void)
2214 {
2215         uint64_t flags = XFEATURE_LEGACY_FP;
2216 
2217         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2218 
2219         if (is_x86_feature(x86_featureset, X86FSET_SSE))
2220                 flags |= XFEATURE_SSE;
2221 
2222         if (is_x86_feature(x86_featureset, X86FSET_AVX))
2223                 flags |= XFEATURE_AVX;
2224 
2225         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2226                 flags |= XFEATURE_AVX512;
2227 
2228         set_xcr(XFEATURE_ENABLED_MASK, flags);
2229 
2230         xsave_bv_all = flags;
2231 }
2232 
2233 static void
2234 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2235 {
2236         struct cpuid_info *cpi;
2237 
2238         cpi = cpu->cpu_m.mcpu_cpi;
2239 
2240         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2241                 cpuid_gather_amd_topology_leaves(cpu);
2242         }
2243 
2244         cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2245 
2246         /*
2247          * Before we can calculate the IDs that we should assign to this
2248          * processor, we need to understand how many cores and threads it has.
2249          */
2250         switch (cpi->cpi_vendor) {
2251         case X86_VENDOR_Intel:
2252                 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2253                     &cpi->cpi_ncore_per_chip);
2254                 break;
2255         case X86_VENDOR_AMD:
2256                 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2257                     &cpi->cpi_ncore_per_chip);
2258                 break;
2259         default:
2260                 /*
2261                  * If we have some other x86 compatible chip, it's not clear how
2262                  * they would behave. The most common case is virtualization
2263                  * today, though there are also 64-bit VIA chips. Assume that
2264                  * all we can get is the basic Leaf 1 HTT information.
2265                  */
2266                 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2267                         cpi->cpi_ncore_per_chip = 1;
2268                         cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2269                 }
2270                 break;
2271         }
2272 
2273         /*
2274          * Based on the calculated number of threads and cores, potentially
2275          * assign the HTT and CMT features.
2276          */
2277         if (cpi->cpi_ncore_per_chip > 1) {
2278                 add_x86_feature(featureset, X86FSET_CMP);
2279         }
2280 
2281         if (cpi->cpi_ncpu_per_chip > 1 &&
2282             cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2283                 add_x86_feature(featureset, X86FSET_HTT);
2284         }
2285 
2286         /*
2287          * Now that has been set up, we need to go through and calculate all of
2288          * the rest of the parameters that exist. If we think the CPU doesn't
2289          * have either SMT (HTT) or CMP, then we basically go through and fake
2290          * up information in some way. The most likely case for this is
2291          * virtualization where we have a lot of partial topology information.
2292          */
2293         if (!is_x86_feature(featureset, X86FSET_HTT) &&
2294             !is_x86_feature(featureset, X86FSET_CMP)) {
2295                 /*
2296                  * This is a single core, single-threaded processor.
2297                  */
2298                 cpi->cpi_procnodes_per_pkg = 1;
2299                 cpi->cpi_cores_per_compunit = 1;
2300                 cpi->cpi_compunitid = 0;
2301                 cpi->cpi_chipid = -1;
2302                 cpi->cpi_clogid = 0;
2303                 cpi->cpi_coreid = cpu->cpu_id;
2304                 cpi->cpi_pkgcoreid = 0;
2305                 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2306                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2307                 } else {
2308                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2309                 }
2310         } else {
2311                 switch (cpi->cpi_vendor) {
2312                 case X86_VENDOR_Intel:
2313                         cpuid_intel_getids(cpu, featureset);
2314                         break;
2315                 case X86_VENDOR_AMD:
2316                         cpuid_amd_getids(cpu, featureset);
2317                         break;
2318                 default:
2319                         /*
2320                          * In this case, it's hard to say what we should do.
2321                          * We're going to model them to the OS as single core
2322                          * threads. We don't have a good identifier for them, so
2323                          * we're just going to use the cpu id all on a single
2324                          * chip.
2325                          *
2326                          * This case has historically been different from the
2327                          * case above where we don't have HTT or CMP. While they
2328                          * could be combined, we've opted to keep it separate to
2329                          * minimize the risk of topology changes in weird cases.
2330                          */
2331                         cpi->cpi_procnodes_per_pkg = 1;
2332                         cpi->cpi_cores_per_compunit = 1;
2333                         cpi->cpi_chipid = 0;
2334                         cpi->cpi_coreid = cpu->cpu_id;
2335                         cpi->cpi_clogid = cpu->cpu_id;
2336                         cpi->cpi_pkgcoreid = cpu->cpu_id;
2337                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2338                         cpi->cpi_compunitid = cpi->cpi_coreid;
2339                         break;
2340                 }
2341         }
2342 }
2343 
2344 void
2345 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2346 {
2347         uint32_t mask_ecx, mask_edx;
2348         struct cpuid_info *cpi;
2349         struct cpuid_regs *cp;
2350         int xcpuid;
2351 #if !defined(__xpv)
2352         extern int idle_cpu_prefer_mwait;
2353 #endif
2354 
2355         /*
2356          * Space statically allocated for BSP, ensure pointer is set
2357          */
2358         if (cpu->cpu_id == 0) {
2359                 if (cpu->cpu_m.mcpu_cpi == NULL)
2360                         cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2361         }
2362 
2363         add_x86_feature(featureset, X86FSET_CPUID);
2364 
2365         cpi = cpu->cpu_m.mcpu_cpi;
2366         ASSERT(cpi != NULL);
2367         cp = &cpi->cpi_std[0];
2368         cp->cp_eax = 0;
2369         cpi->cpi_maxeax = __cpuid_insn(cp);
2370         {
2371                 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2372                 *iptr++ = cp->cp_ebx;
2373                 *iptr++ = cp->cp_edx;
2374                 *iptr++ = cp->cp_ecx;
2375                 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2376         }
2377 
2378         cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2379         x86_vendor = cpi->cpi_vendor; /* for compatibility */
2380 
2381         /*
2382          * Limit the range in case of weird hardware
2383          */
2384         if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2385                 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2386         if (cpi->cpi_maxeax < 1)
2387                 goto pass1_done;
2388 
2389         cp = &cpi->cpi_std[1];
2390         cp->cp_eax = 1;
2391         (void) __cpuid_insn(cp);
2392 
2393         /*
2394          * Extract identifying constants for easy access.
2395          */
2396         cpi->cpi_model = CPI_MODEL(cpi);
2397         cpi->cpi_family = CPI_FAMILY(cpi);
2398 
2399         if (cpi->cpi_family == 0xf)
2400                 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2401 
2402         /*
2403          * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2404          * Intel, and presumably everyone else, uses model == 0xf, as
2405          * one would expect (max value means possible overflow).  Sigh.
2406          */
2407 
2408         switch (cpi->cpi_vendor) {
2409         case X86_VENDOR_Intel:
2410                 if (IS_EXTENDED_MODEL_INTEL(cpi))
2411                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2412                 break;
2413         case X86_VENDOR_AMD:
2414                 if (CPI_FAMILY(cpi) == 0xf)
2415                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2416                 break;
2417         default:
2418                 if (cpi->cpi_model == 0xf)
2419                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2420                 break;
2421         }
2422 
2423         cpi->cpi_step = CPI_STEP(cpi);
2424         cpi->cpi_brandid = CPI_BRANDID(cpi);
2425 
2426         /*
2427          * *default* assumptions:
2428          * - believe %edx feature word
2429          * - ignore %ecx feature word
2430          * - 32-bit virtual and physical addressing
2431          */
2432         mask_edx = 0xffffffff;
2433         mask_ecx = 0;
2434 
2435         cpi->cpi_pabits = cpi->cpi_vabits = 32;
2436 
2437         switch (cpi->cpi_vendor) {
2438         case X86_VENDOR_Intel:
2439                 if (cpi->cpi_family == 5)
2440                         x86_type = X86_TYPE_P5;
2441                 else if (IS_LEGACY_P6(cpi)) {
2442                         x86_type = X86_TYPE_P6;
2443                         pentiumpro_bug4046376 = 1;
2444                         /*
2445                          * Clear the SEP bit when it was set erroneously
2446                          */
2447                         if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2448                                 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2449                 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2450                         x86_type = X86_TYPE_P4;
2451                         /*
2452                          * We don't currently depend on any of the %ecx
2453                          * features until Prescott, so we'll only check
2454                          * this from P4 onwards.  We might want to revisit
2455                          * that idea later.
2456                          */
2457                         mask_ecx = 0xffffffff;
2458                 } else if (cpi->cpi_family > 0xf)
2459                         mask_ecx = 0xffffffff;
2460                 /*
2461                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2462                  * to obtain the monitor linesize.
2463                  */
2464                 if (cpi->cpi_maxeax < 5)
2465                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2466                 break;
2467         case X86_VENDOR_IntelClone:
2468         default:
2469                 break;
2470         case X86_VENDOR_AMD:
2471 #if defined(OPTERON_ERRATUM_108)
2472                 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2473                         cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2474                         cpi->cpi_model = 0xc;
2475                 } else
2476 #endif
2477                 if (cpi->cpi_family == 5) {
2478                         /*
2479                          * AMD K5 and K6
2480                          *
2481                          * These CPUs have an incomplete implementation
2482                          * of MCA/MCE which we mask away.
2483                          */
2484                         mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2485 
2486                         /*
2487                          * Model 0 uses the wrong (APIC) bit
2488                          * to indicate PGE.  Fix it here.
2489                          */
2490                         if (cpi->cpi_model == 0) {
2491                                 if (cp->cp_edx & 0x200) {
2492                                         cp->cp_edx &= ~0x200;
2493                                         cp->cp_edx |= CPUID_INTC_EDX_PGE;
2494                                 }
2495                         }
2496 
2497                         /*
2498                          * Early models had problems w/ MMX; disable.
2499                          */
2500                         if (cpi->cpi_model < 6)
2501                                 mask_edx &= ~CPUID_INTC_EDX_MMX;
2502                 }
2503 
2504                 /*
2505                  * For newer families, SSE3 and CX16, at least, are valid;
2506                  * enable all
2507                  */
2508                 if (cpi->cpi_family >= 0xf)
2509                         mask_ecx = 0xffffffff;
2510                 /*
2511                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2512                  * to obtain the monitor linesize.
2513                  */
2514                 if (cpi->cpi_maxeax < 5)
2515                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2516 
2517 #if !defined(__xpv)
2518                 /*
2519                  * AMD has not historically used MWAIT in the CPU's idle loop.
2520                  * Pre-family-10h Opterons do not have the MWAIT instruction. We
2521                  * know for certain that in at least family 17h, per AMD, mwait
2522                  * is preferred. Families in-between are less certain.
2523                  */
2524                 if (cpi->cpi_family < 0x17) {
2525                         idle_cpu_prefer_mwait = 0;
2526                 }
2527 #endif
2528 
2529                 break;
2530         case X86_VENDOR_TM:
2531                 /*
2532                  * workaround the NT workaround in CMS 4.1
2533                  */
2534                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2535                     (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2536                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2537                 break;
2538         case X86_VENDOR_Centaur:
2539                 /*
2540                  * workaround the NT workarounds again
2541                  */
2542                 if (cpi->cpi_family == 6)
2543                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2544                 break;
2545         case X86_VENDOR_Cyrix:
2546                 /*
2547                  * We rely heavily on the probing in locore
2548                  * to actually figure out what parts, if any,
2549                  * of the Cyrix cpuid instruction to believe.
2550                  */
2551                 switch (x86_type) {
2552                 case X86_TYPE_CYRIX_486:
2553                         mask_edx = 0;
2554                         break;
2555                 case X86_TYPE_CYRIX_6x86:
2556                         mask_edx = 0;
2557                         break;
2558                 case X86_TYPE_CYRIX_6x86L:
2559                         mask_edx =
2560                             CPUID_INTC_EDX_DE |
2561                             CPUID_INTC_EDX_CX8;
2562                         break;
2563                 case X86_TYPE_CYRIX_6x86MX:
2564                         mask_edx =
2565                             CPUID_INTC_EDX_DE |
2566                             CPUID_INTC_EDX_MSR |
2567                             CPUID_INTC_EDX_CX8 |
2568                             CPUID_INTC_EDX_PGE |
2569                             CPUID_INTC_EDX_CMOV |
2570                             CPUID_INTC_EDX_MMX;
2571                         break;
2572                 case X86_TYPE_CYRIX_GXm:
2573                         mask_edx =
2574                             CPUID_INTC_EDX_MSR |
2575                             CPUID_INTC_EDX_CX8 |
2576                             CPUID_INTC_EDX_CMOV |
2577                             CPUID_INTC_EDX_MMX;
2578                         break;
2579                 case X86_TYPE_CYRIX_MediaGX:
2580                         break;
2581                 case X86_TYPE_CYRIX_MII:
2582                 case X86_TYPE_VIA_CYRIX_III:
2583                         mask_edx =
2584                             CPUID_INTC_EDX_DE |
2585                             CPUID_INTC_EDX_TSC |
2586                             CPUID_INTC_EDX_MSR |
2587                             CPUID_INTC_EDX_CX8 |
2588                             CPUID_INTC_EDX_PGE |
2589                             CPUID_INTC_EDX_CMOV |
2590                             CPUID_INTC_EDX_MMX;
2591                         break;
2592                 default:
2593                         break;
2594                 }
2595                 break;
2596         }
2597 
2598 #if defined(__xpv)
2599         /*
2600          * Do not support MONITOR/MWAIT under a hypervisor
2601          */
2602         mask_ecx &= ~CPUID_INTC_ECX_MON;
2603         /*
2604          * Do not support XSAVE under a hypervisor for now
2605          */
2606         xsave_force_disable = B_TRUE;
2607 
2608 #endif  /* __xpv */
2609 
2610         if (xsave_force_disable) {
2611                 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2612                 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2613                 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2614                 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2615         }
2616 
2617         /*
2618          * Now we've figured out the masks that determine
2619          * which bits we choose to believe, apply the masks
2620          * to the feature words, then map the kernel's view
2621          * of these feature words into its feature word.
2622          */
2623         cp->cp_edx &= mask_edx;
2624         cp->cp_ecx &= mask_ecx;
2625 
2626         /*
2627          * apply any platform restrictions (we don't call this
2628          * immediately after __cpuid_insn here, because we need the
2629          * workarounds applied above first)
2630          */
2631         platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2632 
2633         /*
2634          * In addition to ecx and edx, Intel and AMD are storing a bunch of
2635          * instruction set extensions in leaf 7's ebx, ecx, and edx.
2636          */
2637         if (cpi->cpi_maxeax >= 7) {
2638                 struct cpuid_regs *ecp;
2639                 ecp = &cpi->cpi_std[7];
2640                 ecp->cp_eax = 7;
2641                 ecp->cp_ecx = 0;
2642                 (void) __cpuid_insn(ecp);
2643 
2644                 /*
2645                  * If XSAVE has been disabled, just ignore all of the
2646                  * extended-save-area dependent flags here.
2647                  */
2648                 if (xsave_force_disable) {
2649                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2650                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2651                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2652                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2653                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2654                         ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2655                         ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2656                 }
2657 
2658                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2659                         add_x86_feature(featureset, X86FSET_SMEP);
2660 
2661                 /*
2662                  * We check disable_smap here in addition to in startup_smap()
2663                  * to ensure CPUs that aren't the boot CPU don't accidentally
2664                  * include it in the feature set and thus generate a mismatched
2665                  * x86 feature set across CPUs.
2666                  */
2667                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2668                     disable_smap == 0)
2669                         add_x86_feature(featureset, X86FSET_SMAP);
2670 
2671                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2672                         add_x86_feature(featureset, X86FSET_RDSEED);
2673 
2674                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2675                         add_x86_feature(featureset, X86FSET_ADX);
2676 
2677                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2678                         add_x86_feature(featureset, X86FSET_FSGSBASE);
2679 
2680                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2681                         add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2682 
2683                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2684                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2685                                 add_x86_feature(featureset, X86FSET_INVPCID);
2686 
2687                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2688                                 add_x86_feature(featureset, X86FSET_MPX);
2689 
2690                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2691                                 add_x86_feature(featureset, X86FSET_CLWB);
2692                 }
2693         }
2694 
2695         /*
2696          * fold in overrides from the "eeprom" mechanism
2697          */
2698         cp->cp_edx |= cpuid_feature_edx_include;
2699         cp->cp_edx &= ~cpuid_feature_edx_exclude;
2700 
2701         cp->cp_ecx |= cpuid_feature_ecx_include;
2702         cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2703 
2704         if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2705                 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2706         }
2707         if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2708                 add_x86_feature(featureset, X86FSET_TSC);
2709         }
2710         if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2711                 add_x86_feature(featureset, X86FSET_MSR);
2712         }
2713         if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2714                 add_x86_feature(featureset, X86FSET_MTRR);
2715         }
2716         if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2717                 add_x86_feature(featureset, X86FSET_PGE);
2718         }
2719         if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2720                 add_x86_feature(featureset, X86FSET_CMOV);
2721         }
2722         if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2723                 add_x86_feature(featureset, X86FSET_MMX);
2724         }
2725         if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2726             (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2727                 add_x86_feature(featureset, X86FSET_MCA);
2728         }
2729         if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2730                 add_x86_feature(featureset, X86FSET_PAE);
2731         }
2732         if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2733                 add_x86_feature(featureset, X86FSET_CX8);
2734         }
2735         if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2736                 add_x86_feature(featureset, X86FSET_CX16);
2737         }
2738         if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2739                 add_x86_feature(featureset, X86FSET_PAT);
2740         }
2741         if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2742                 add_x86_feature(featureset, X86FSET_SEP);
2743         }
2744         if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2745                 /*
2746                  * In our implementation, fxsave/fxrstor
2747                  * are prerequisites before we'll even
2748                  * try and do SSE things.
2749                  */
2750                 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2751                         add_x86_feature(featureset, X86FSET_SSE);
2752                 }
2753                 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2754                         add_x86_feature(featureset, X86FSET_SSE2);
2755                 }
2756                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2757                         add_x86_feature(featureset, X86FSET_SSE3);
2758                 }
2759                 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2760                         add_x86_feature(featureset, X86FSET_SSSE3);
2761                 }
2762                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2763                         add_x86_feature(featureset, X86FSET_SSE4_1);
2764                 }
2765                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2766                         add_x86_feature(featureset, X86FSET_SSE4_2);
2767                 }
2768                 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2769                         add_x86_feature(featureset, X86FSET_AES);
2770                 }
2771                 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2772                         add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2773                 }
2774 
2775                 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2776                         add_x86_feature(featureset, X86FSET_SHA);
2777 
2778                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2779                         add_x86_feature(featureset, X86FSET_UMIP);
2780                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2781                         add_x86_feature(featureset, X86FSET_PKU);
2782                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2783                         add_x86_feature(featureset, X86FSET_OSPKE);
2784 
2785                 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2786                         add_x86_feature(featureset, X86FSET_XSAVE);
2787 
2788                         /* We only test AVX & AVX512 when there is XSAVE */
2789 
2790                         if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2791                                 add_x86_feature(featureset,
2792                                     X86FSET_AVX);
2793 
2794                                 /*
2795                                  * Intel says we can't check these without also
2796                                  * checking AVX.
2797                                  */
2798                                 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2799                                         add_x86_feature(featureset,
2800                                             X86FSET_F16C);
2801 
2802                                 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2803                                         add_x86_feature(featureset,
2804                                             X86FSET_FMA);
2805 
2806                                 if (cpi->cpi_std[7].cp_ebx &
2807                                     CPUID_INTC_EBX_7_0_BMI1)
2808                                         add_x86_feature(featureset,
2809                                             X86FSET_BMI1);
2810 
2811                                 if (cpi->cpi_std[7].cp_ebx &
2812                                     CPUID_INTC_EBX_7_0_BMI2)
2813                                         add_x86_feature(featureset,
2814                                             X86FSET_BMI2);
2815 
2816                                 if (cpi->cpi_std[7].cp_ebx &
2817                                     CPUID_INTC_EBX_7_0_AVX2)
2818                                         add_x86_feature(featureset,
2819                                             X86FSET_AVX2);
2820                         }
2821 
2822                         if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2823                             (cpi->cpi_std[7].cp_ebx &
2824                             CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2825                                 add_x86_feature(featureset, X86FSET_AVX512F);
2826 
2827                                 if (cpi->cpi_std[7].cp_ebx &
2828                                     CPUID_INTC_EBX_7_0_AVX512DQ)
2829                                         add_x86_feature(featureset,
2830                                             X86FSET_AVX512DQ);
2831                                 if (cpi->cpi_std[7].cp_ebx &
2832                                     CPUID_INTC_EBX_7_0_AVX512IFMA)
2833                                         add_x86_feature(featureset,
2834                                             X86FSET_AVX512FMA);
2835                                 if (cpi->cpi_std[7].cp_ebx &
2836                                     CPUID_INTC_EBX_7_0_AVX512PF)
2837                                         add_x86_feature(featureset,
2838                                             X86FSET_AVX512PF);
2839                                 if (cpi->cpi_std[7].cp_ebx &
2840                                     CPUID_INTC_EBX_7_0_AVX512ER)
2841                                         add_x86_feature(featureset,
2842                                             X86FSET_AVX512ER);
2843                                 if (cpi->cpi_std[7].cp_ebx &
2844                                     CPUID_INTC_EBX_7_0_AVX512CD)
2845                                         add_x86_feature(featureset,
2846                                             X86FSET_AVX512CD);
2847                                 if (cpi->cpi_std[7].cp_ebx &
2848                                     CPUID_INTC_EBX_7_0_AVX512BW)
2849                                         add_x86_feature(featureset,
2850                                             X86FSET_AVX512BW);
2851                                 if (cpi->cpi_std[7].cp_ebx &
2852                                     CPUID_INTC_EBX_7_0_AVX512VL)
2853                                         add_x86_feature(featureset,
2854                                             X86FSET_AVX512VL);
2855 
2856                                 if (cpi->cpi_std[7].cp_ecx &
2857                                     CPUID_INTC_ECX_7_0_AVX512VBMI)
2858                                         add_x86_feature(featureset,
2859                                             X86FSET_AVX512VBMI);
2860                                 if (cpi->cpi_std[7].cp_ecx &
2861                                     CPUID_INTC_ECX_7_0_AVX512VNNI)
2862                                         add_x86_feature(featureset,
2863                                             X86FSET_AVX512VNNI);
2864                                 if (cpi->cpi_std[7].cp_ecx &
2865                                     CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2866                                         add_x86_feature(featureset,
2867                                             X86FSET_AVX512VPOPCDQ);
2868 
2869                                 if (cpi->cpi_std[7].cp_edx &
2870                                     CPUID_INTC_EDX_7_0_AVX5124NNIW)
2871                                         add_x86_feature(featureset,
2872                                             X86FSET_AVX512NNIW);
2873                                 if (cpi->cpi_std[7].cp_edx &
2874                                     CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2875                                         add_x86_feature(featureset,
2876                                             X86FSET_AVX512FMAPS);
2877                         }
2878                 }
2879         }
2880 
2881         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2882                 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2883                         add_x86_feature(featureset, X86FSET_PCID);
2884                 }
2885         }
2886 
2887         if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2888                 add_x86_feature(featureset, X86FSET_X2APIC);
2889         }
2890         if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2891                 add_x86_feature(featureset, X86FSET_DE);
2892         }
2893 #if !defined(__xpv)
2894         if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2895 
2896                 /*
2897                  * We require the CLFLUSH instruction for erratum workaround
2898                  * to use MONITOR/MWAIT.
2899                  */
2900                 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2901                         cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2902                         add_x86_feature(featureset, X86FSET_MWAIT);
2903                 } else {
2904                         extern int idle_cpu_assert_cflush_monitor;
2905 
2906                         /*
2907                          * All processors we are aware of which have
2908                          * MONITOR/MWAIT also have CLFLUSH.
2909                          */
2910                         if (idle_cpu_assert_cflush_monitor) {
2911                                 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2912                                     (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2913                         }
2914                 }
2915         }
2916 #endif  /* __xpv */
2917 
2918         if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2919                 add_x86_feature(featureset, X86FSET_VMX);
2920         }
2921 
2922         if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2923                 add_x86_feature(featureset, X86FSET_RDRAND);
2924 
2925         /*
2926          * Only need it first time, rest of the cpus would follow suit.
2927          * we only capture this for the bootcpu.
2928          */
2929         if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2930                 add_x86_feature(featureset, X86FSET_CLFSH);
2931                 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2932         }
2933         if (is_x86_feature(featureset, X86FSET_PAE))
2934                 cpi->cpi_pabits = 36;
2935 
2936         if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2937                 struct cpuid_regs r, *ecp;
2938 
2939                 ecp = &r;
2940                 ecp->cp_eax = 0xD;
2941                 ecp->cp_ecx = 1;
2942                 ecp->cp_edx = ecp->cp_ebx = 0;
2943                 (void) __cpuid_insn(ecp);
2944 
2945                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2946                         add_x86_feature(featureset, X86FSET_XSAVEOPT);
2947                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2948                         add_x86_feature(featureset, X86FSET_XSAVEC);
2949                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
2950                         add_x86_feature(featureset, X86FSET_XSAVES);
2951         }
2952 
2953         /*
2954          * Work on the "extended" feature information, doing
2955          * some basic initialization for cpuid_pass2()
2956          */
2957         xcpuid = 0;
2958         switch (cpi->cpi_vendor) {
2959         case X86_VENDOR_Intel:
2960                 /*
2961                  * On KVM we know we will have proper support for extended
2962                  * cpuid.
2963                  */
2964                 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
2965                     (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
2966                     (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
2967                         xcpuid++;
2968                 break;
2969         case X86_VENDOR_AMD:
2970                 if (cpi->cpi_family > 5 ||
2971                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
2972                         xcpuid++;
2973                 break;
2974         case X86_VENDOR_Cyrix:
2975                 /*
2976                  * Only these Cyrix CPUs are -known- to support
2977                  * extended cpuid operations.
2978                  */
2979                 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
2980                     x86_type == X86_TYPE_CYRIX_GXm)
2981                         xcpuid++;
2982                 break;
2983         case X86_VENDOR_Centaur:
2984         case X86_VENDOR_TM:
2985         default:
2986                 xcpuid++;
2987                 break;
2988         }
2989 
2990         if (xcpuid) {
2991                 cp = &cpi->cpi_extd[0];
2992                 cp->cp_eax = CPUID_LEAF_EXT_0;
2993                 cpi->cpi_xmaxeax = __cpuid_insn(cp);
2994         }
2995 
2996         if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
2997 
2998                 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
2999                         cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3000 
3001                 switch (cpi->cpi_vendor) {
3002                 case X86_VENDOR_Intel:
3003                 case X86_VENDOR_AMD:
3004                         if (cpi->cpi_xmaxeax < 0x80000001)
3005                                 break;
3006                         cp = &cpi->cpi_extd[1];
3007                         cp->cp_eax = 0x80000001;
3008                         (void) __cpuid_insn(cp);
3009 
3010                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3011                             cpi->cpi_family == 5 &&
3012                             cpi->cpi_model == 6 &&
3013                             cpi->cpi_step == 6) {
3014                                 /*
3015                                  * K6 model 6 uses bit 10 to indicate SYSC
3016                                  * Later models use bit 11. Fix it here.
3017                                  */
3018                                 if (cp->cp_edx & 0x400) {
3019                                         cp->cp_edx &= ~0x400;
3020                                         cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3021                                 }
3022                         }
3023 
3024                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3025 
3026                         /*
3027                          * Compute the additions to the kernel's feature word.
3028                          */
3029                         if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3030                                 add_x86_feature(featureset, X86FSET_NX);
3031                         }
3032 
3033                         /*
3034                          * Regardless whether or not we boot 64-bit,
3035                          * we should have a way to identify whether
3036                          * the CPU is capable of running 64-bit.
3037                          */
3038                         if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3039                                 add_x86_feature(featureset, X86FSET_64);
3040                         }
3041 
3042                         /* 1 GB large page - enable only for 64 bit kernel */
3043                         if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3044                                 add_x86_feature(featureset, X86FSET_1GPG);
3045                         }
3046 
3047                         if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3048                             (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3049                             (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3050                                 add_x86_feature(featureset, X86FSET_SSE4A);
3051                         }
3052 
3053                         /*
3054                          * It's really tricky to support syscall/sysret in
3055                          * the i386 kernel; we rely on sysenter/sysexit
3056                          * instead.  In the amd64 kernel, things are -way-
3057                          * better.
3058                          */
3059                         if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3060                                 add_x86_feature(featureset, X86FSET_ASYSC);
3061                         }
3062 
3063                         /*
3064                          * While we're thinking about system calls, note
3065                          * that AMD processors don't support sysenter
3066                          * in long mode at all, so don't try to program them.
3067                          */
3068                         if (x86_vendor == X86_VENDOR_AMD) {
3069                                 remove_x86_feature(featureset, X86FSET_SEP);
3070                         }
3071 
3072                         if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3073                                 add_x86_feature(featureset, X86FSET_TSCP);
3074                         }
3075 
3076                         if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3077                                 add_x86_feature(featureset, X86FSET_SVM);
3078                         }
3079 
3080                         if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3081                                 add_x86_feature(featureset, X86FSET_TOPOEXT);
3082                         }
3083 
3084                         if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3085                                 add_x86_feature(featureset, X86FSET_XOP);
3086                         }
3087 
3088                         if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3089                                 add_x86_feature(featureset, X86FSET_FMA4);
3090                         }
3091 
3092                         if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3093                                 add_x86_feature(featureset, X86FSET_TBM);
3094                         }
3095 
3096                         if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3097                                 add_x86_feature(featureset, X86FSET_MONITORX);
3098                         }
3099                         break;
3100                 default:
3101                         break;
3102                 }
3103 
3104                 /*
3105                  * Get CPUID data about processor cores and hyperthreads.
3106                  */
3107                 switch (cpi->cpi_vendor) {
3108                 case X86_VENDOR_Intel:
3109                         if (cpi->cpi_maxeax >= 4) {
3110                                 cp = &cpi->cpi_std[4];
3111                                 cp->cp_eax = 4;
3112                                 cp->cp_ecx = 0;
3113                                 (void) __cpuid_insn(cp);
3114                                 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3115                         }
3116                         /*FALLTHROUGH*/
3117                 case X86_VENDOR_AMD:
3118                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3119                                 break;
3120                         cp = &cpi->cpi_extd[8];
3121                         cp->cp_eax = CPUID_LEAF_EXT_8;
3122                         (void) __cpuid_insn(cp);
3123                         platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3124                             cp);
3125 
3126                         /*
3127                          * AMD uses ebx for some extended functions.
3128                          */
3129                         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3130                                 /*
3131                                  * While we're here, check for the AMD "Error
3132                                  * Pointer Zero/Restore" feature. This can be
3133                                  * used to setup the FP save handlers
3134                                  * appropriately.
3135                                  */
3136                                 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3137                                         cpi->cpi_fp_amd_save = 0;
3138                                 } else {
3139                                         cpi->cpi_fp_amd_save = 1;
3140                                 }
3141 
3142                                 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3143                                         add_x86_feature(featureset,
3144                                             X86FSET_CLZERO);
3145                                 }
3146                         }
3147 
3148                         /*
3149                          * Virtual and physical address limits from
3150                          * cpuid override previously guessed values.
3151                          */
3152                         cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3153                         cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3154                         break;
3155                 default:
3156                         break;
3157                 }
3158 
3159                 /*
3160                  * Get CPUID data about TSC Invariance in Deep C-State.
3161                  */
3162                 switch (cpi->cpi_vendor) {
3163                 case X86_VENDOR_Intel:
3164                 case X86_VENDOR_AMD:
3165                         if (cpi->cpi_maxeax >= 7) {
3166                                 cp = &cpi->cpi_extd[7];
3167                                 cp->cp_eax = 0x80000007;
3168                                 cp->cp_ecx = 0;
3169                                 (void) __cpuid_insn(cp);
3170                         }
3171                         break;
3172                 default:
3173                         break;
3174                 }
3175         }
3176 
3177         cpuid_pass1_topology(cpu, featureset);
3178 
3179         /*
3180          * Synthesize chip "revision" and socket type
3181          */
3182         cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3183             cpi->cpi_model, cpi->cpi_step);
3184         cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3185             cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3186         cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3187             cpi->cpi_model, cpi->cpi_step);
3188 
3189         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3190                 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3191                     cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3192                         /* Special handling for AMD FP not necessary. */
3193                         cpi->cpi_fp_amd_save = 0;
3194                 } else {
3195                         cpi->cpi_fp_amd_save = 1;
3196                 }
3197         }
3198 
3199         /*
3200          * Check the processor leaves that are used for security features.
3201          */
3202         cpuid_scan_security(cpu, featureset);
3203 
3204 pass1_done:
3205         cpi->cpi_pass = 1;
3206 }
3207 
3208 /*
3209  * Make copies of the cpuid table entries we depend on, in
3210  * part for ease of parsing now, in part so that we have only
3211  * one place to correct any of it, in part for ease of
3212  * later export to userland, and in part so we can look at
3213  * this stuff in a crash dump.
3214  */
3215 
3216 /*ARGSUSED*/
3217 void
3218 cpuid_pass2(cpu_t *cpu)
3219 {
3220         uint_t n, nmax;
3221         int i;
3222         struct cpuid_regs *cp;
3223         uint8_t *dp;
3224         uint32_t *iptr;
3225         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3226 
3227         ASSERT(cpi->cpi_pass == 1);
3228 
3229         if (cpi->cpi_maxeax < 1)
3230                 goto pass2_done;
3231 
3232         if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3233                 nmax = NMAX_CPI_STD;
3234         /*
3235          * (We already handled n == 0 and n == 1 in pass 1)
3236          */
3237         for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3238                 cp->cp_eax = n;
3239 
3240                 /*
3241                  * n == 7 was handled in pass 1
3242                  */
3243                 if (n == 7)
3244                         continue;
3245 
3246                 /*
3247                  * CPUID function 4 expects %ecx to be initialized
3248                  * with an index which indicates which cache to return
3249                  * information about. The OS is expected to call function 4
3250                  * with %ecx set to 0, 1, 2, ... until it returns with
3251                  * EAX[4:0] set to 0, which indicates there are no more
3252                  * caches.
3253                  *
3254                  * Here, populate cpi_std[4] with the information returned by
3255                  * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3256                  * when dynamic memory allocation becomes available.
3257                  *
3258                  * Note: we need to explicitly initialize %ecx here, since
3259                  * function 4 may have been previously invoked.
3260                  */
3261                 if (n == 4)
3262                         cp->cp_ecx = 0;
3263 
3264                 (void) __cpuid_insn(cp);
3265                 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3266                 switch (n) {
3267                 case 2:
3268                         /*
3269                          * "the lower 8 bits of the %eax register
3270                          * contain a value that identifies the number
3271                          * of times the cpuid [instruction] has to be
3272                          * executed to obtain a complete image of the
3273                          * processor's caching systems."
3274                          *
3275                          * How *do* they make this stuff up?
3276                          */
3277                         cpi->cpi_ncache = sizeof (*cp) *
3278                             BITX(cp->cp_eax, 7, 0);
3279                         if (cpi->cpi_ncache == 0)
3280                                 break;
3281                         cpi->cpi_ncache--;   /* skip count byte */
3282 
3283                         /*
3284                          * Well, for now, rather than attempt to implement
3285                          * this slightly dubious algorithm, we just look
3286                          * at the first 15 ..
3287                          */
3288                         if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3289                                 cpi->cpi_ncache = sizeof (*cp) - 1;
3290 
3291                         dp = cpi->cpi_cacheinfo;
3292                         if (BITX(cp->cp_eax, 31, 31) == 0) {
3293                                 uint8_t *p = (void *)&cp->cp_eax;
3294                                 for (i = 1; i < 4; i++)
3295                                         if (p[i] != 0)
3296                                                 *dp++ = p[i];
3297                         }
3298                         if (BITX(cp->cp_ebx, 31, 31) == 0) {
3299                                 uint8_t *p = (void *)&cp->cp_ebx;
3300                                 for (i = 0; i < 4; i++)
3301                                         if (p[i] != 0)
3302                                                 *dp++ = p[i];
3303                         }
3304                         if (BITX(cp->cp_ecx, 31, 31) == 0) {
3305                                 uint8_t *p = (void *)&cp->cp_ecx;
3306                                 for (i = 0; i < 4; i++)
3307                                         if (p[i] != 0)
3308                                                 *dp++ = p[i];
3309                         }
3310                         if (BITX(cp->cp_edx, 31, 31) == 0) {
3311                                 uint8_t *p = (void *)&cp->cp_edx;
3312                                 for (i = 0; i < 4; i++)
3313                                         if (p[i] != 0)
3314                                                 *dp++ = p[i];
3315                         }
3316                         break;
3317 
3318                 case 3: /* Processor serial number, if PSN supported */
3319                         break;
3320 
3321                 case 4: /* Deterministic cache parameters */
3322                         break;
3323 
3324                 case 5: /* Monitor/Mwait parameters */
3325                 {
3326                         size_t mwait_size;
3327 
3328                         /*
3329                          * check cpi_mwait.support which was set in cpuid_pass1
3330                          */
3331                         if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3332                                 break;
3333 
3334                         /*
3335                          * Protect ourself from insane mwait line size.
3336                          * Workaround for incomplete hardware emulator(s).
3337                          */
3338                         mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3339                         if (mwait_size < sizeof (uint32_t) ||
3340                             !ISP2(mwait_size)) {
3341 #if DEBUG
3342                                 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3343                                     "size %ld", cpu->cpu_id, (long)mwait_size);
3344 #endif
3345                                 break;
3346                         }
3347 
3348                         cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3349                         cpi->cpi_mwait.mon_max = mwait_size;
3350                         if (MWAIT_EXTENSION(cpi)) {
3351                                 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3352                                 if (MWAIT_INT_ENABLE(cpi))
3353                                         cpi->cpi_mwait.support |=
3354                                             MWAIT_ECX_INT_ENABLE;
3355                         }
3356                         break;
3357                 }
3358                 default:
3359                         break;
3360                 }
3361         }
3362 
3363         /*
3364          * XSAVE enumeration
3365          */
3366         if (cpi->cpi_maxeax >= 0xD) {
3367                 struct cpuid_regs regs;
3368                 boolean_t cpuid_d_valid = B_TRUE;
3369 
3370                 cp = &regs;
3371                 cp->cp_eax = 0xD;
3372                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3373 
3374                 (void) __cpuid_insn(cp);
3375 
3376                 /*
3377                  * Sanity checks for debug
3378                  */
3379                 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3380                     (cp->cp_eax & XFEATURE_SSE) == 0) {
3381                         cpuid_d_valid = B_FALSE;
3382                 }
3383 
3384                 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3385                 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3386                 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3387 
3388                 /*
3389                  * If the hw supports AVX, get the size and offset in the save
3390                  * area for the ymm state.
3391                  */
3392                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3393                         cp->cp_eax = 0xD;
3394                         cp->cp_ecx = 2;
3395                         cp->cp_edx = cp->cp_ebx = 0;
3396 
3397                         (void) __cpuid_insn(cp);
3398 
3399                         if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3400                             cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3401                                 cpuid_d_valid = B_FALSE;
3402                         }
3403 
3404                         cpi->cpi_xsave.ymm_size = cp->cp_eax;
3405                         cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3406                 }
3407 
3408                 /*
3409                  * If the hw supports MPX, get the size and offset in the
3410                  * save area for BNDREGS and BNDCSR.
3411                  */
3412                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3413                         cp->cp_eax = 0xD;
3414                         cp->cp_ecx = 3;
3415                         cp->cp_edx = cp->cp_ebx = 0;
3416 
3417                         (void) __cpuid_insn(cp);
3418 
3419                         cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3420                         cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3421 
3422                         cp->cp_eax = 0xD;
3423                         cp->cp_ecx = 4;
3424                         cp->cp_edx = cp->cp_ebx = 0;
3425 
3426                         (void) __cpuid_insn(cp);
3427 
3428                         cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3429                         cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3430                 }
3431 
3432                 /*
3433                  * If the hw supports AVX512, get the size and offset in the
3434                  * save area for the opmask registers and zmm state.
3435                  */
3436                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3437                         cp->cp_eax = 0xD;
3438                         cp->cp_ecx = 5;
3439                         cp->cp_edx = cp->cp_ebx = 0;
3440 
3441                         (void) __cpuid_insn(cp);
3442 
3443                         cpi->cpi_xsave.opmask_size = cp->cp_eax;
3444                         cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3445 
3446                         cp->cp_eax = 0xD;
3447                         cp->cp_ecx = 6;
3448                         cp->cp_edx = cp->cp_ebx = 0;
3449 
3450                         (void) __cpuid_insn(cp);
3451 
3452                         cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3453                         cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3454 
3455                         cp->cp_eax = 0xD;
3456                         cp->cp_ecx = 7;
3457                         cp->cp_edx = cp->cp_ebx = 0;
3458 
3459                         (void) __cpuid_insn(cp);
3460 
3461                         cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3462                         cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3463                 }
3464 
3465                 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3466                         xsave_state_size = 0;
3467                 } else if (cpuid_d_valid) {
3468                         xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3469                 } else {
3470                         /* Broken CPUID 0xD, probably in HVM */
3471                         cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3472                             "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3473                             ", ymm_size = %d, ymm_offset = %d\n",
3474                             cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3475                             cpi->cpi_xsave.xsav_hw_features_high,
3476                             (int)cpi->cpi_xsave.xsav_max_size,
3477                             (int)cpi->cpi_xsave.ymm_size,
3478                             (int)cpi->cpi_xsave.ymm_offset);
3479 
3480                         if (xsave_state_size != 0) {
3481                                 /*
3482                                  * This must be a non-boot CPU. We cannot
3483                                  * continue, because boot cpu has already
3484                                  * enabled XSAVE.
3485                                  */
3486                                 ASSERT(cpu->cpu_id != 0);
3487                                 cmn_err(CE_PANIC, "cpu%d: we have already "
3488                                     "enabled XSAVE on boot cpu, cannot "
3489                                     "continue.", cpu->cpu_id);
3490                         } else {
3491                                 /*
3492                                  * If we reached here on the boot CPU, it's also
3493                                  * almost certain that we'll reach here on the
3494                                  * non-boot CPUs. When we're here on a boot CPU
3495                                  * we should disable the feature, on a non-boot
3496                                  * CPU we need to confirm that we have.
3497                                  */
3498                                 if (cpu->cpu_id == 0) {
3499                                         remove_x86_feature(x86_featureset,
3500                                             X86FSET_XSAVE);
3501                                         remove_x86_feature(x86_featureset,
3502                                             X86FSET_AVX);
3503                                         remove_x86_feature(x86_featureset,
3504                                             X86FSET_F16C);
3505                                         remove_x86_feature(x86_featureset,
3506                                             X86FSET_BMI1);
3507                                         remove_x86_feature(x86_featureset,
3508                                             X86FSET_BMI2);
3509                                         remove_x86_feature(x86_featureset,
3510                                             X86FSET_FMA);
3511                                         remove_x86_feature(x86_featureset,
3512                                             X86FSET_AVX2);
3513                                         remove_x86_feature(x86_featureset,
3514                                             X86FSET_MPX);
3515                                         remove_x86_feature(x86_featureset,
3516                                             X86FSET_AVX512F);
3517                                         remove_x86_feature(x86_featureset,
3518                                             X86FSET_AVX512DQ);
3519                                         remove_x86_feature(x86_featureset,
3520                                             X86FSET_AVX512PF);
3521                                         remove_x86_feature(x86_featureset,
3522                                             X86FSET_AVX512ER);
3523                                         remove_x86_feature(x86_featureset,
3524                                             X86FSET_AVX512CD);
3525                                         remove_x86_feature(x86_featureset,
3526                                             X86FSET_AVX512BW);
3527                                         remove_x86_feature(x86_featureset,
3528                                             X86FSET_AVX512VL);
3529                                         remove_x86_feature(x86_featureset,
3530                                             X86FSET_AVX512FMA);
3531                                         remove_x86_feature(x86_featureset,
3532                                             X86FSET_AVX512VBMI);
3533                                         remove_x86_feature(x86_featureset,
3534                                             X86FSET_AVX512VNNI);
3535                                         remove_x86_feature(x86_featureset,
3536                                             X86FSET_AVX512VPOPCDQ);
3537                                         remove_x86_feature(x86_featureset,
3538                                             X86FSET_AVX512NNIW);
3539                                         remove_x86_feature(x86_featureset,
3540                                             X86FSET_AVX512FMAPS);
3541 
3542                                         CPI_FEATURES_ECX(cpi) &=
3543                                             ~CPUID_INTC_ECX_XSAVE;
3544                                         CPI_FEATURES_ECX(cpi) &=
3545                                             ~CPUID_INTC_ECX_AVX;
3546                                         CPI_FEATURES_ECX(cpi) &=
3547                                             ~CPUID_INTC_ECX_F16C;
3548                                         CPI_FEATURES_ECX(cpi) &=
3549                                             ~CPUID_INTC_ECX_FMA;
3550                                         CPI_FEATURES_7_0_EBX(cpi) &=
3551                                             ~CPUID_INTC_EBX_7_0_BMI1;
3552                                         CPI_FEATURES_7_0_EBX(cpi) &=
3553                                             ~CPUID_INTC_EBX_7_0_BMI2;
3554                                         CPI_FEATURES_7_0_EBX(cpi) &=
3555                                             ~CPUID_INTC_EBX_7_0_AVX2;
3556                                         CPI_FEATURES_7_0_EBX(cpi) &=
3557                                             ~CPUID_INTC_EBX_7_0_MPX;
3558                                         CPI_FEATURES_7_0_EBX(cpi) &=
3559                                             ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3560 
3561                                         CPI_FEATURES_7_0_ECX(cpi) &=
3562                                             ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3563 
3564                                         CPI_FEATURES_7_0_EDX(cpi) &=
3565                                             ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3566 
3567                                         xsave_force_disable = B_TRUE;
3568                                 } else {
3569                                         VERIFY(is_x86_feature(x86_featureset,
3570                                             X86FSET_XSAVE) == B_FALSE);
3571                                 }
3572                         }
3573                 }
3574         }
3575 
3576 
3577         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3578                 goto pass2_done;
3579 
3580         if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3581                 nmax = NMAX_CPI_EXTD;
3582         /*
3583          * Copy the extended properties, fixing them as we go.
3584          * (We already handled n == 0 and n == 1 in pass 1)
3585          */
3586         iptr = (void *)cpi->cpi_brandstr;
3587         for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3588                 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3589                 (void) __cpuid_insn(cp);
3590                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3591                     cp);
3592                 switch (n) {
3593                 case 2:
3594                 case 3:
3595                 case 4:
3596                         /*
3597                          * Extract the brand string
3598                          */
3599                         *iptr++ = cp->cp_eax;
3600                         *iptr++ = cp->cp_ebx;
3601                         *iptr++ = cp->cp_ecx;
3602                         *iptr++ = cp->cp_edx;
3603                         break;
3604                 case 5:
3605                         switch (cpi->cpi_vendor) {
3606                         case X86_VENDOR_AMD:
3607                                 /*
3608                                  * The Athlon and Duron were the first
3609                                  * parts to report the sizes of the
3610                                  * TLB for large pages. Before then,
3611                                  * we don't trust the data.
3612                                  */
3613                                 if (cpi->cpi_family < 6 ||
3614                                     (cpi->cpi_family == 6 &&
3615                                     cpi->cpi_model < 1))
3616                                         cp->cp_eax = 0;
3617                                 break;
3618                         default:
3619                                 break;
3620                         }
3621                         break;
3622                 case 6:
3623                         switch (cpi->cpi_vendor) {
3624                         case X86_VENDOR_AMD:
3625                                 /*
3626                                  * The Athlon and Duron were the first
3627                                  * AMD parts with L2 TLB's.
3628                                  * Before then, don't trust the data.
3629                                  */
3630                                 if (cpi->cpi_family < 6 ||
3631                                     cpi->cpi_family == 6 &&
3632                                     cpi->cpi_model < 1)
3633                                         cp->cp_eax = cp->cp_ebx = 0;
3634                                 /*
3635                                  * AMD Duron rev A0 reports L2
3636                                  * cache size incorrectly as 1K
3637                                  * when it is really 64K
3638                                  */
3639                                 if (cpi->cpi_family == 6 &&
3640                                     cpi->cpi_model == 3 &&
3641                                     cpi->cpi_step == 0) {
3642                                         cp->cp_ecx &= 0xffff;
3643                                         cp->cp_ecx |= 0x400000;
3644                                 }
3645                                 break;
3646                         case X86_VENDOR_Cyrix:  /* VIA C3 */
3647                                 /*
3648                                  * VIA C3 processors are a bit messed
3649                                  * up w.r.t. encoding cache sizes in %ecx
3650                                  */
3651                                 if (cpi->cpi_family != 6)
3652                                         break;
3653                                 /*
3654                                  * model 7 and 8 were incorrectly encoded
3655                                  *
3656                                  * xxx is model 8 really broken?
3657                                  */
3658                                 if (cpi->cpi_model == 7 ||
3659                                     cpi->cpi_model == 8)
3660                                         cp->cp_ecx =
3661                                             BITX(cp->cp_ecx, 31, 24) << 16 |
3662                                             BITX(cp->cp_ecx, 23, 16) << 12 |
3663                                             BITX(cp->cp_ecx, 15, 8) << 8 |
3664                                             BITX(cp->cp_ecx, 7, 0);
3665                                 /*
3666                                  * model 9 stepping 1 has wrong associativity
3667                                  */
3668                                 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3669                                         cp->cp_ecx |= 8 << 12;
3670                                 break;
3671                         case X86_VENDOR_Intel:
3672                                 /*
3673                                  * Extended L2 Cache features function.
3674                                  * First appeared on Prescott.
3675                                  */
3676                         default:
3677                                 break;
3678                         }
3679                         break;
3680                 default:
3681                         break;
3682                 }
3683         }
3684 
3685 pass2_done:
3686         cpi->cpi_pass = 2;
3687 }
3688 
3689 static const char *
3690 intel_cpubrand(const struct cpuid_info *cpi)
3691 {
3692         int i;
3693 
3694         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3695             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3696                 return ("i486");
3697 
3698         switch (cpi->cpi_family) {
3699         case 5:
3700                 return ("Intel Pentium(r)");
3701         case 6:
3702                 switch (cpi->cpi_model) {
3703                         uint_t celeron, xeon;
3704                         const struct cpuid_regs *cp;
3705                 case 0:
3706                 case 1:
3707                 case 2:
3708                         return ("Intel Pentium(r) Pro");
3709                 case 3:
3710                 case 4:
3711                         return ("Intel Pentium(r) II");
3712                 case 6:
3713                         return ("Intel Celeron(r)");
3714                 case 5:
3715                 case 7:
3716                         celeron = xeon = 0;
3717                         cp = &cpi->cpi_std[2];   /* cache info */
3718 
3719                         for (i = 1; i < 4; i++) {
3720                                 uint_t tmp;
3721 
3722                                 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3723                                 if (tmp == 0x40)
3724                                         celeron++;
3725                                 if (tmp >= 0x44 && tmp <= 0x45)
3726                                         xeon++;
3727                         }
3728 
3729                         for (i = 0; i < 2; i++) {
3730                                 uint_t tmp;
3731 
3732                                 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3733                                 if (tmp == 0x40)
3734                                         celeron++;
3735                                 else if (tmp >= 0x44 && tmp <= 0x45)
3736                                         xeon++;
3737                         }
3738 
3739                         for (i = 0; i < 4; i++) {
3740                                 uint_t tmp;
3741 
3742                                 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3743                                 if (tmp == 0x40)
3744                                         celeron++;
3745                                 else if (tmp >= 0x44 && tmp <= 0x45)
3746                                         xeon++;
3747                         }
3748 
3749                         for (i = 0; i < 4; i++) {
3750                                 uint_t tmp;
3751 
3752                                 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3753                                 if (tmp == 0x40)
3754                                         celeron++;
3755                                 else if (tmp >= 0x44 && tmp <= 0x45)
3756                                         xeon++;
3757                         }
3758 
3759                         if (celeron)
3760                                 return ("Intel Celeron(r)");
3761                         if (xeon)
3762                                 return (cpi->cpi_model == 5 ?
3763                                     "Intel Pentium(r) II Xeon(tm)" :
3764                                     "Intel Pentium(r) III Xeon(tm)");
3765                         return (cpi->cpi_model == 5 ?
3766                             "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3767                             "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3768                 default:
3769                         break;
3770                 }
3771         default:
3772                 break;
3773         }
3774 
3775         /* BrandID is present if the field is nonzero */
3776         if (cpi->cpi_brandid != 0) {
3777                 static const struct {
3778                         uint_t bt_bid;
3779                         const char *bt_str;
3780                 } brand_tbl[] = {
3781                         { 0x1,  "Intel(r) Celeron(r)" },
3782                         { 0x2,  "Intel(r) Pentium(r) III" },
3783                         { 0x3,  "Intel(r) Pentium(r) III Xeon(tm)" },
3784                         { 0x4,  "Intel(r) Pentium(r) III" },
3785                         { 0x6,  "Mobile Intel(r) Pentium(r) III" },
3786                         { 0x7,  "Mobile Intel(r) Celeron(r)" },
3787                         { 0x8,  "Intel(r) Pentium(r) 4" },
3788                         { 0x9,  "Intel(r) Pentium(r) 4" },
3789                         { 0xa,  "Intel(r) Celeron(r)" },
3790                         { 0xb,  "Intel(r) Xeon(tm)" },
3791                         { 0xc,  "Intel(r) Xeon(tm) MP" },
3792                         { 0xe,  "Mobile Intel(r) Pentium(r) 4" },
3793                         { 0xf,  "Mobile Intel(r) Celeron(r)" },
3794                         { 0x11, "Mobile Genuine Intel(r)" },
3795                         { 0x12, "Intel(r) Celeron(r) M" },
3796                         { 0x13, "Mobile Intel(r) Celeron(r)" },
3797                         { 0x14, "Intel(r) Celeron(r)" },
3798                         { 0x15, "Mobile Genuine Intel(r)" },
3799                         { 0x16, "Intel(r) Pentium(r) M" },
3800                         { 0x17, "Mobile Intel(r) Celeron(r)" }
3801                 };
3802                 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3803                 uint_t sgn;
3804 
3805                 sgn = (cpi->cpi_family << 8) |
3806                     (cpi->cpi_model << 4) | cpi->cpi_step;
3807 
3808                 for (i = 0; i < btblmax; i++)
3809                         if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3810                                 break;
3811                 if (i < btblmax) {
3812                         if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3813                                 return ("Intel(r) Celeron(r)");
3814                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3815                                 return ("Intel(r) Xeon(tm) MP");
3816                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3817                                 return ("Intel(r) Xeon(tm)");
3818                         return (brand_tbl[i].bt_str);
3819                 }
3820         }
3821 
3822         return (NULL);
3823 }
3824 
3825 static const char *
3826 amd_cpubrand(const struct cpuid_info *cpi)
3827 {
3828         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3829             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3830                 return ("i486 compatible");
3831 
3832         switch (cpi->cpi_family) {
3833         case 5:
3834                 switch (cpi->cpi_model) {
3835                 case 0:
3836                 case 1:
3837                 case 2:
3838                 case 3:
3839                 case 4:
3840                 case 5:
3841                         return ("AMD-K5(r)");
3842                 case 6:
3843                 case 7:
3844                         return ("AMD-K6(r)");
3845                 case 8:
3846                         return ("AMD-K6(r)-2");
3847                 case 9:
3848                         return ("AMD-K6(r)-III");
3849                 default:
3850                         return ("AMD (family 5)");
3851                 }
3852         case 6:
3853                 switch (cpi->cpi_model) {
3854                 case 1:
3855                         return ("AMD-K7(tm)");
3856                 case 0:
3857                 case 2:
3858                 case 4:
3859                         return ("AMD Athlon(tm)");
3860                 case 3:
3861                 case 7:
3862                         return ("AMD Duron(tm)");
3863                 case 6:
3864                 case 8:
3865                 case 10:
3866                         /*
3867                          * Use the L2 cache size to distinguish
3868                          */
3869                         return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3870                             "AMD Athlon(tm)" : "AMD Duron(tm)");
3871                 default:
3872                         return ("AMD (family 6)");
3873                 }
3874         default:
3875                 break;
3876         }
3877 
3878         if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3879             cpi->cpi_brandid != 0) {
3880                 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3881                 case 3:
3882                         return ("AMD Opteron(tm) UP 1xx");
3883                 case 4:
3884                         return ("AMD Opteron(tm) DP 2xx");
3885                 case 5:
3886                         return ("AMD Opteron(tm) MP 8xx");
3887                 default:
3888                         return ("AMD Opteron(tm)");
3889                 }
3890         }
3891 
3892         return (NULL);
3893 }
3894 
3895 static const char *
3896 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3897 {
3898         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3899             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3900             type == X86_TYPE_CYRIX_486)
3901                 return ("i486 compatible");
3902 
3903         switch (type) {
3904         case X86_TYPE_CYRIX_6x86:
3905                 return ("Cyrix 6x86");
3906         case X86_TYPE_CYRIX_6x86L:
3907                 return ("Cyrix 6x86L");
3908         case X86_TYPE_CYRIX_6x86MX:
3909                 return ("Cyrix 6x86MX");
3910         case X86_TYPE_CYRIX_GXm:
3911                 return ("Cyrix GXm");
3912         case X86_TYPE_CYRIX_MediaGX:
3913                 return ("Cyrix MediaGX");
3914         case X86_TYPE_CYRIX_MII:
3915                 return ("Cyrix M2");
3916         case X86_TYPE_VIA_CYRIX_III:
3917                 return ("VIA Cyrix M3");
3918         default:
3919                 /*
3920                  * Have another wild guess ..
3921                  */
3922                 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3923                         return ("Cyrix 5x86");
3924                 else if (cpi->cpi_family == 5) {
3925                         switch (cpi->cpi_model) {
3926                         case 2:
3927                                 return ("Cyrix 6x86");  /* Cyrix M1 */
3928                         case 4:
3929                                 return ("Cyrix MediaGX");
3930                         default:
3931                                 break;
3932                         }
3933                 } else if (cpi->cpi_family == 6) {
3934                         switch (cpi->cpi_model) {
3935                         case 0:
3936                                 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3937                         case 5:
3938                         case 6:
3939                         case 7:
3940                         case 8:
3941                         case 9:
3942                                 return ("VIA C3");
3943                         default:
3944                                 break;
3945                         }
3946                 }
3947                 break;
3948         }
3949         return (NULL);
3950 }
3951 
3952 /*
3953  * This only gets called in the case that the CPU extended
3954  * feature brand string (0x80000002, 0x80000003, 0x80000004)
3955  * aren't available, or contain null bytes for some reason.
3956  */
3957 static void
3958 fabricate_brandstr(struct cpuid_info *cpi)
3959 {
3960         const char *brand = NULL;
3961 
3962         switch (cpi->cpi_vendor) {
3963         case X86_VENDOR_Intel:
3964                 brand = intel_cpubrand(cpi);
3965                 break;
3966         case X86_VENDOR_AMD:
3967                 brand = amd_cpubrand(cpi);
3968                 break;
3969         case X86_VENDOR_Cyrix:
3970                 brand = cyrix_cpubrand(cpi, x86_type);
3971                 break;
3972         case X86_VENDOR_NexGen:
3973                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3974                         brand = "NexGen Nx586";
3975                 break;
3976         case X86_VENDOR_Centaur:
3977                 if (cpi->cpi_family == 5)
3978                         switch (cpi->cpi_model) {
3979                         case 4:
3980                                 brand = "Centaur C6";
3981                                 break;
3982                         case 8:
3983                                 brand = "Centaur C2";
3984                                 break;
3985                         case 9:
3986                                 brand = "Centaur C3";
3987                                 break;
3988                         default:
3989                                 break;
3990                         }
3991                 break;
3992         case X86_VENDOR_Rise:
3993                 if (cpi->cpi_family == 5 &&
3994                     (cpi->cpi_model == 0 || cpi->cpi_model == 2))
3995                         brand = "Rise mP6";
3996                 break;
3997         case X86_VENDOR_SiS:
3998                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3999                         brand = "SiS 55x";
4000                 break;
4001         case X86_VENDOR_TM:
4002                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4003                         brand = "Transmeta Crusoe TM3x00 or TM5x00";
4004                 break;
4005         case X86_VENDOR_NSC:
4006         case X86_VENDOR_UMC:
4007         default:
4008                 break;
4009         }
4010         if (brand) {
4011                 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4012                 return;
4013         }
4014 
4015         /*
4016          * If all else fails ...
4017          */
4018         (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4019             "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4020             cpi->cpi_model, cpi->cpi_step);
4021 }
4022 
4023 /*
4024  * This routine is called just after kernel memory allocation
4025  * becomes available on cpu0, and as part of mp_startup() on
4026  * the other cpus.
4027  *
4028  * Fixup the brand string, and collect any information from cpuid
4029  * that requires dynamically allocated storage to represent.
4030  */
4031 /*ARGSUSED*/
4032 void
4033 cpuid_pass3(cpu_t *cpu)
4034 {
4035         int     i, max, shft, level, size;
4036         struct cpuid_regs regs;
4037         struct cpuid_regs *cp;
4038         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4039 
4040         ASSERT(cpi->cpi_pass == 2);
4041 
4042         /*
4043          * Deterministic cache parameters
4044          *
4045          * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4046          * values that are present are currently defined to be the same. This
4047          * means we can use the same logic to parse it as long as we use the
4048          * appropriate leaf to get the data. If you're updating this, make sure
4049          * you're careful about which vendor supports which aspect.
4050          *
4051          * Take this opportunity to detect the number of threads sharing the
4052          * last level cache, and construct a corresponding cache id. The
4053          * respective cpuid_info members are initialized to the default case of
4054          * "no last level cache sharing".
4055          */
4056         cpi->cpi_ncpu_shr_last_cache = 1;
4057         cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4058 
4059         if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4060             (cpi->cpi_vendor == X86_VENDOR_AMD &&
4061             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4062             is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4063                 uint32_t leaf;
4064 
4065                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4066                         leaf = 4;
4067                 } else {
4068                         leaf = CPUID_LEAF_EXT_1d;
4069                 }
4070 
4071                 /*
4072                  * Find the # of elements (size) returned by the leaf and along
4073                  * the way detect last level cache sharing details.
4074                  */
4075                 bzero(&regs, sizeof (regs));
4076                 cp = &regs;
4077                 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4078                         cp->cp_eax = leaf;
4079                         cp->cp_ecx = i;
4080 
4081                         (void) __cpuid_insn(cp);
4082 
4083                         if (CPI_CACHE_TYPE(cp) == 0)
4084                                 break;
4085                         level = CPI_CACHE_LVL(cp);
4086                         if (level > max) {
4087                                 max = level;
4088                                 cpi->cpi_ncpu_shr_last_cache =
4089                                     CPI_NTHR_SHR_CACHE(cp) + 1;
4090                         }
4091                 }
4092                 cpi->cpi_cache_leaf_size = size = i;
4093 
4094                 /*
4095                  * Allocate the cpi_cache_leaves array. The first element
4096                  * references the regs for the corresponding leaf with %ecx set
4097                  * to 0. This was gathered in cpuid_pass2().
4098                  */
4099                 if (size > 0) {
4100                         cpi->cpi_cache_leaves =
4101                             kmem_alloc(size * sizeof (cp), KM_SLEEP);
4102                         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4103                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4104                         } else {
4105                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4106                         }
4107 
4108                         /*
4109                          * Allocate storage to hold the additional regs
4110                          * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4111                          *
4112                          * The regs for the leaf, %ecx == 0 has already
4113                          * been allocated as indicated above.
4114                          */
4115                         for (i = 1; i < size; i++) {
4116                                 cp = cpi->cpi_cache_leaves[i] =
4117                                     kmem_zalloc(sizeof (regs), KM_SLEEP);
4118                                 cp->cp_eax = leaf;
4119                                 cp->cp_ecx = i;
4120 
4121                                 (void) __cpuid_insn(cp);
4122                         }
4123                 }
4124                 /*
4125                  * Determine the number of bits needed to represent
4126                  * the number of CPUs sharing the last level cache.
4127                  *
4128                  * Shift off that number of bits from the APIC id to
4129                  * derive the cache id.
4130                  */
4131                 shft = 0;
4132                 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4133                         shft++;
4134                 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4135         }
4136 
4137         /*
4138          * Now fixup the brand string
4139          */
4140         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4141                 fabricate_brandstr(cpi);
4142         } else {
4143 
4144                 /*
4145                  * If we successfully extracted a brand string from the cpuid
4146                  * instruction, clean it up by removing leading spaces and
4147                  * similar junk.
4148                  */
4149                 if (cpi->cpi_brandstr[0]) {
4150                         size_t maxlen = sizeof (cpi->cpi_brandstr);
4151                         char *src, *dst;
4152 
4153                         dst = src = (char *)cpi->cpi_brandstr;
4154                         src[maxlen - 1] = '\0';
4155                         /*
4156                          * strip leading spaces
4157                          */
4158                         while (*src == ' ')
4159                                 src++;
4160                         /*
4161                          * Remove any 'Genuine' or "Authentic" prefixes
4162                          */
4163                         if (strncmp(src, "Genuine ", 8) == 0)
4164                                 src += 8;
4165                         if (strncmp(src, "Authentic ", 10) == 0)
4166                                 src += 10;
4167 
4168                         /*
4169                          * Now do an in-place copy.
4170                          * Map (R) to (r) and (TM) to (tm).
4171                          * The era of teletypes is long gone, and there's
4172                          * -really- no need to shout.
4173                          */
4174                         while (*src != '\0') {
4175                                 if (src[0] == '(') {
4176                                         if (strncmp(src + 1, "R)", 2) == 0) {
4177                                                 (void) strncpy(dst, "(r)", 3);
4178                                                 src += 3;
4179                                                 dst += 3;
4180                                                 continue;
4181                                         }
4182                                         if (strncmp(src + 1, "TM)", 3) == 0) {
4183                                                 (void) strncpy(dst, "(tm)", 4);
4184                                                 src += 4;
4185                                                 dst += 4;
4186                                                 continue;
4187                                         }
4188                                 }
4189                                 *dst++ = *src++;
4190                         }
4191                         *dst = '\0';
4192 
4193                         /*
4194                          * Finally, remove any trailing spaces
4195                          */
4196                         while (--dst > cpi->cpi_brandstr)
4197                                 if (*dst == ' ')
4198                                         *dst = '\0';
4199                                 else
4200                                         break;
4201                 } else
4202                         fabricate_brandstr(cpi);
4203         }
4204         cpi->cpi_pass = 3;
4205 }
4206 
4207 /*
4208  * This routine is called out of bind_hwcap() much later in the life
4209  * of the kernel (post_startup()).  The job of this routine is to resolve
4210  * the hardware feature support and kernel support for those features into
4211  * what we're actually going to tell applications via the aux vector.
4212  */
4213 void
4214 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4215 {
4216         struct cpuid_info *cpi;
4217         uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4218 
4219         if (cpu == NULL)
4220                 cpu = CPU;
4221         cpi = cpu->cpu_m.mcpu_cpi;
4222 
4223         ASSERT(cpi->cpi_pass == 3);
4224 
4225         if (cpi->cpi_maxeax >= 1) {
4226                 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4227                 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4228                 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4229 
4230                 *edx = CPI_FEATURES_EDX(cpi);
4231                 *ecx = CPI_FEATURES_ECX(cpi);
4232                 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4233 
4234                 /*
4235                  * [these require explicit kernel support]
4236                  */
4237                 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4238                         *edx &= ~CPUID_INTC_EDX_SEP;
4239 
4240                 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4241                         *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4242                 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4243                         *edx &= ~CPUID_INTC_EDX_SSE2;
4244 
4245                 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4246                         *edx &= ~CPUID_INTC_EDX_HTT;
4247 
4248                 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4249                         *ecx &= ~CPUID_INTC_ECX_SSE3;
4250 
4251                 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4252                         *ecx &= ~CPUID_INTC_ECX_SSSE3;
4253                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4254                         *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4255                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4256                         *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4257                 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4258                         *ecx &= ~CPUID_INTC_ECX_AES;
4259                 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4260                         *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4261                 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4262                         *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4263                             CPUID_INTC_ECX_OSXSAVE);
4264                 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4265                         *ecx &= ~CPUID_INTC_ECX_AVX;
4266                 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4267                         *ecx &= ~CPUID_INTC_ECX_F16C;
4268                 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4269                         *ecx &= ~CPUID_INTC_ECX_FMA;
4270                 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4271                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4272                 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4273                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4274                 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4275                         *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4276                 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4277                         *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4278                 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4279                         *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4280 
4281                 /*
4282                  * [no explicit support required beyond x87 fp context]
4283                  */
4284                 if (!fpu_exists)
4285                         *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4286 
4287                 /*
4288                  * Now map the supported feature vector to things that we
4289                  * think userland will care about.
4290                  */
4291                 if (*edx & CPUID_INTC_EDX_SEP)
4292                         hwcap_flags |= AV_386_SEP;
4293                 if (*edx & CPUID_INTC_EDX_SSE)
4294                         hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4295                 if (*edx & CPUID_INTC_EDX_SSE2)
4296                         hwcap_flags |= AV_386_SSE2;
4297                 if (*ecx & CPUID_INTC_ECX_SSE3)
4298                         hwcap_flags |= AV_386_SSE3;
4299                 if (*ecx & CPUID_INTC_ECX_SSSE3)
4300                         hwcap_flags |= AV_386_SSSE3;
4301                 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4302                         hwcap_flags |= AV_386_SSE4_1;
4303                 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4304                         hwcap_flags |= AV_386_SSE4_2;
4305                 if (*ecx & CPUID_INTC_ECX_MOVBE)
4306                         hwcap_flags |= AV_386_MOVBE;
4307                 if (*ecx & CPUID_INTC_ECX_AES)
4308                         hwcap_flags |= AV_386_AES;
4309                 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4310                         hwcap_flags |= AV_386_PCLMULQDQ;
4311                 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4312                     (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4313                         hwcap_flags |= AV_386_XSAVE;
4314 
4315                         if (*ecx & CPUID_INTC_ECX_AVX) {
4316                                 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4317                                 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4318 
4319                                 hwcap_flags |= AV_386_AVX;
4320                                 if (*ecx & CPUID_INTC_ECX_F16C)
4321                                         hwcap_flags_2 |= AV_386_2_F16C;
4322                                 if (*ecx & CPUID_INTC_ECX_FMA)
4323                                         hwcap_flags_2 |= AV_386_2_FMA;
4324 
4325                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4326                                         hwcap_flags_2 |= AV_386_2_BMI1;
4327                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4328                                         hwcap_flags_2 |= AV_386_2_BMI2;
4329                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4330                                         hwcap_flags_2 |= AV_386_2_AVX2;
4331                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4332                                         hwcap_flags_2 |= AV_386_2_AVX512F;
4333                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4334                                         hwcap_flags_2 |= AV_386_2_AVX512DQ;
4335                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4336                                         hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4337                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4338                                         hwcap_flags_2 |= AV_386_2_AVX512PF;
4339                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4340                                         hwcap_flags_2 |= AV_386_2_AVX512ER;
4341                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4342                                         hwcap_flags_2 |= AV_386_2_AVX512CD;
4343                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4344                                         hwcap_flags_2 |= AV_386_2_AVX512BW;
4345                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4346                                         hwcap_flags_2 |= AV_386_2_AVX512VL;
4347 
4348                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4349                                         hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4350                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4351                                         hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4352                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4353                                         hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4354 
4355                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4356                                         hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4357                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4358                                         hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4359                         }
4360                 }
4361                 if (*ecx & CPUID_INTC_ECX_VMX)
4362                         hwcap_flags |= AV_386_VMX;
4363                 if (*ecx & CPUID_INTC_ECX_POPCNT)
4364                         hwcap_flags |= AV_386_POPCNT;
4365                 if (*edx & CPUID_INTC_EDX_FPU)
4366                         hwcap_flags |= AV_386_FPU;
4367                 if (*edx & CPUID_INTC_EDX_MMX)
4368                         hwcap_flags |= AV_386_MMX;
4369 
4370                 if (*edx & CPUID_INTC_EDX_TSC)
4371                         hwcap_flags |= AV_386_TSC;
4372                 if (*edx & CPUID_INTC_EDX_CX8)
4373                         hwcap_flags |= AV_386_CX8;
4374                 if (*edx & CPUID_INTC_EDX_CMOV)
4375                         hwcap_flags |= AV_386_CMOV;
4376                 if (*ecx & CPUID_INTC_ECX_CX16)
4377                         hwcap_flags |= AV_386_CX16;
4378 
4379                 if (*ecx & CPUID_INTC_ECX_RDRAND)
4380                         hwcap_flags_2 |= AV_386_2_RDRAND;
4381                 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4382                         hwcap_flags_2 |= AV_386_2_ADX;
4383                 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4384                         hwcap_flags_2 |= AV_386_2_RDSEED;
4385                 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4386                         hwcap_flags_2 |= AV_386_2_SHA;
4387                 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4388                         hwcap_flags_2 |= AV_386_2_FSGSBASE;
4389                 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4390                         hwcap_flags_2 |= AV_386_2_CLWB;
4391                 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4392                         hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4393 
4394         }
4395         /*
4396          * Check a few miscilaneous features.
4397          */
4398         if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4399                 hwcap_flags_2 |= AV_386_2_CLZERO;
4400 
4401         if (cpi->cpi_xmaxeax < 0x80000001)
4402                 goto pass4_done;
4403 
4404         switch (cpi->cpi_vendor) {
4405                 struct cpuid_regs cp;
4406                 uint32_t *edx, *ecx;
4407 
4408         case X86_VENDOR_Intel:
4409                 /*
4410                  * Seems like Intel duplicated what we necessary
4411                  * here to make the initial crop of 64-bit OS's work.
4412                  * Hopefully, those are the only "extended" bits
4413                  * they'll add.
4414                  */
4415                 /*FALLTHROUGH*/
4416 
4417         case X86_VENDOR_AMD:
4418                 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4419                 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4420 
4421                 *edx = CPI_FEATURES_XTD_EDX(cpi);
4422                 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4423 
4424                 /*
4425                  * [these features require explicit kernel support]
4426                  */
4427                 switch (cpi->cpi_vendor) {
4428                 case X86_VENDOR_Intel:
4429                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4430                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4431                         break;
4432 
4433                 case X86_VENDOR_AMD:
4434                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4435                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4436                         if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4437                                 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4438                         break;
4439 
4440                 default:
4441                         break;
4442                 }
4443 
4444                 /*
4445                  * [no explicit support required beyond
4446                  * x87 fp context and exception handlers]
4447                  */
4448                 if (!fpu_exists)
4449                         *edx &= ~(CPUID_AMD_EDX_MMXamd |
4450                             CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4451 
4452                 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4453                         *edx &= ~CPUID_AMD_EDX_NX;
4454 #if !defined(__amd64)
4455                 *edx &= ~CPUID_AMD_EDX_LM;
4456 #endif
4457                 /*
4458                  * Now map the supported feature vector to
4459                  * things that we think userland will care about.
4460                  */
4461 #if defined(__amd64)
4462                 if (*edx & CPUID_AMD_EDX_SYSC)
4463                         hwcap_flags |= AV_386_AMD_SYSC;
4464 #endif
4465                 if (*edx & CPUID_AMD_EDX_MMXamd)
4466                         hwcap_flags |= AV_386_AMD_MMX;
4467                 if (*edx & CPUID_AMD_EDX_3DNow)
4468                         hwcap_flags |= AV_386_AMD_3DNow;
4469                 if (*edx & CPUID_AMD_EDX_3DNowx)
4470                         hwcap_flags |= AV_386_AMD_3DNowx;
4471                 if (*ecx & CPUID_AMD_ECX_SVM)
4472                         hwcap_flags |= AV_386_AMD_SVM;
4473 
4474                 switch (cpi->cpi_vendor) {
4475                 case X86_VENDOR_AMD:
4476                         if (*edx & CPUID_AMD_EDX_TSCP)
4477                                 hwcap_flags |= AV_386_TSCP;
4478                         if (*ecx & CPUID_AMD_ECX_AHF64)
4479                                 hwcap_flags |= AV_386_AHF;
4480                         if (*ecx & CPUID_AMD_ECX_SSE4A)
4481                                 hwcap_flags |= AV_386_AMD_SSE4A;
4482                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4483                                 hwcap_flags |= AV_386_AMD_LZCNT;
4484                         if (*ecx & CPUID_AMD_ECX_MONITORX)
4485                                 hwcap_flags_2 |= AV_386_2_MONITORX;
4486                         break;
4487 
4488                 case X86_VENDOR_Intel:
4489                         if (*edx & CPUID_AMD_EDX_TSCP)
4490                                 hwcap_flags |= AV_386_TSCP;
4491                         /*
4492                          * Aarrgh.
4493                          * Intel uses a different bit in the same word.
4494                          */
4495                         if (*ecx & CPUID_INTC_ECX_AHF64)
4496                                 hwcap_flags |= AV_386_AHF;
4497                         break;
4498 
4499                 default:
4500                         break;
4501                 }
4502                 break;
4503 
4504         case X86_VENDOR_TM:
4505                 cp.cp_eax = 0x80860001;
4506                 (void) __cpuid_insn(&cp);
4507                 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4508                 break;
4509 
4510         default:
4511                 break;
4512         }
4513 
4514 pass4_done:
4515         cpi->cpi_pass = 4;
4516         if (hwcap_out != NULL) {
4517                 hwcap_out[0] = hwcap_flags;
4518                 hwcap_out[1] = hwcap_flags_2;
4519         }
4520 }
4521 
4522 
4523 /*
4524  * Simulate the cpuid instruction using the data we previously
4525  * captured about this CPU.  We try our best to return the truth
4526  * about the hardware, independently of kernel support.
4527  */
4528 uint32_t
4529 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4530 {
4531         struct cpuid_info *cpi;
4532         struct cpuid_regs *xcp;
4533 
4534         if (cpu == NULL)
4535                 cpu = CPU;
4536         cpi = cpu->cpu_m.mcpu_cpi;
4537 
4538         ASSERT(cpuid_checkpass(cpu, 3));
4539 
4540         /*
4541          * CPUID data is cached in two separate places: cpi_std for standard
4542          * CPUID leaves , and cpi_extd for extended CPUID leaves.
4543          */
4544         if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4545                 xcp = &cpi->cpi_std[cp->cp_eax];
4546         } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4547             cp->cp_eax <= cpi->cpi_xmaxeax &&
4548             cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4549                 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4550         } else {
4551                 /*
4552                  * The caller is asking for data from an input parameter which
4553                  * the kernel has not cached.  In this case we go fetch from
4554                  * the hardware and return the data directly to the user.
4555                  */
4556                 return (__cpuid_insn(cp));
4557         }
4558 
4559         cp->cp_eax = xcp->cp_eax;
4560         cp->cp_ebx = xcp->cp_ebx;
4561         cp->cp_ecx = xcp->cp_ecx;
4562         cp->cp_edx = xcp->cp_edx;
4563         return (cp->cp_eax);
4564 }
4565 
4566 int
4567 cpuid_checkpass(cpu_t *cpu, int pass)
4568 {
4569         return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4570             cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4571 }
4572 
4573 int
4574 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4575 {
4576         ASSERT(cpuid_checkpass(cpu, 3));
4577 
4578         return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4579 }
4580 
4581 int
4582 cpuid_is_cmt(cpu_t *cpu)
4583 {
4584         if (cpu == NULL)
4585                 cpu = CPU;
4586 
4587         ASSERT(cpuid_checkpass(cpu, 1));
4588 
4589         return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4590 }
4591 
4592 /*
4593  * AMD and Intel both implement the 64-bit variant of the syscall
4594  * instruction (syscallq), so if there's -any- support for syscall,
4595  * cpuid currently says "yes, we support this".
4596  *
4597  * However, Intel decided to -not- implement the 32-bit variant of the
4598  * syscall instruction, so we provide a predicate to allow our caller
4599  * to test that subtlety here.
4600  *
4601  * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4602  *      even in the case where the hardware would in fact support it.
4603  */
4604 /*ARGSUSED*/
4605 int
4606 cpuid_syscall32_insn(cpu_t *cpu)
4607 {
4608         ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4609 
4610 #if !defined(__xpv)
4611         if (cpu == NULL)
4612                 cpu = CPU;
4613 
4614         /*CSTYLED*/
4615         {
4616                 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4617 
4618                 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4619                     cpi->cpi_xmaxeax >= 0x80000001 &&
4620                     (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4621                         return (1);
4622         }
4623 #endif
4624         return (0);
4625 }
4626 
4627 int
4628 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4629 {
4630         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4631 
4632         static const char fmt[] =
4633             "x86 (%s %X family %d model %d step %d clock %d MHz)";
4634         static const char fmt_ht[] =
4635             "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4636 
4637         ASSERT(cpuid_checkpass(cpu, 1));
4638 
4639         if (cpuid_is_cmt(cpu))
4640                 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4641                     cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4642                     cpi->cpi_family, cpi->cpi_model,
4643                     cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4644         return (snprintf(s, n, fmt,
4645             cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4646             cpi->cpi_family, cpi->cpi_model,
4647             cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4648 }
4649 
4650 const char *
4651 cpuid_getvendorstr(cpu_t *cpu)
4652 {
4653         ASSERT(cpuid_checkpass(cpu, 1));
4654         return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4655 }
4656 
4657 uint_t
4658 cpuid_getvendor(cpu_t *cpu)
4659 {
4660         ASSERT(cpuid_checkpass(cpu, 1));
4661         return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4662 }
4663 
4664 uint_t
4665 cpuid_getfamily(cpu_t *cpu)
4666 {
4667         ASSERT(cpuid_checkpass(cpu, 1));
4668         return (cpu->cpu_m.mcpu_cpi->cpi_family);
4669 }
4670 
4671 uint_t
4672 cpuid_getmodel(cpu_t *cpu)
4673 {
4674         ASSERT(cpuid_checkpass(cpu, 1));
4675         return (cpu->cpu_m.mcpu_cpi->cpi_model);
4676 }
4677 
4678 uint_t
4679 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4680 {
4681         ASSERT(cpuid_checkpass(cpu, 1));
4682         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4683 }
4684 
4685 uint_t
4686 cpuid_get_ncore_per_chip(cpu_t *cpu)
4687 {
4688         ASSERT(cpuid_checkpass(cpu, 1));
4689         return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4690 }
4691 
4692 uint_t
4693 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4694 {
4695         ASSERT(cpuid_checkpass(cpu, 2));
4696         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4697 }
4698 
4699 id_t
4700 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4701 {
4702         ASSERT(cpuid_checkpass(cpu, 2));
4703         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4704 }
4705 
4706 uint_t
4707 cpuid_getstep(cpu_t *cpu)
4708 {
4709         ASSERT(cpuid_checkpass(cpu, 1));
4710         return (cpu->cpu_m.mcpu_cpi->cpi_step);
4711 }
4712 
4713 uint_t
4714 cpuid_getsig(struct cpu *cpu)
4715 {
4716         ASSERT(cpuid_checkpass(cpu, 1));
4717         return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4718 }
4719 
4720 uint32_t
4721 cpuid_getchiprev(struct cpu *cpu)
4722 {
4723         ASSERT(cpuid_checkpass(cpu, 1));
4724         return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4725 }
4726 
4727 const char *
4728 cpuid_getchiprevstr(struct cpu *cpu)
4729 {
4730         ASSERT(cpuid_checkpass(cpu, 1));
4731         return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4732 }
4733 
4734 uint32_t
4735 cpuid_getsockettype(struct cpu *cpu)
4736 {
4737         ASSERT(cpuid_checkpass(cpu, 1));
4738         return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4739 }
4740 
4741 const char *
4742 cpuid_getsocketstr(cpu_t *cpu)
4743 {
4744         static const char *socketstr = NULL;
4745         struct cpuid_info *cpi;
4746 
4747         ASSERT(cpuid_checkpass(cpu, 1));
4748         cpi = cpu->cpu_m.mcpu_cpi;
4749 
4750         /* Assume that socket types are the same across the system */
4751         if (socketstr == NULL)
4752                 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4753                     cpi->cpi_model, cpi->cpi_step);
4754 
4755 
4756         return (socketstr);
4757 }
4758 
4759 int
4760 cpuid_get_chipid(cpu_t *cpu)
4761 {
4762         ASSERT(cpuid_checkpass(cpu, 1));
4763 
4764         if (cpuid_is_cmt(cpu))
4765                 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4766         return (cpu->cpu_id);
4767 }
4768 
4769 id_t
4770 cpuid_get_coreid(cpu_t *cpu)
4771 {
4772         ASSERT(cpuid_checkpass(cpu, 1));
4773         return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4774 }
4775 
4776 int
4777 cpuid_get_pkgcoreid(cpu_t *cpu)
4778 {
4779         ASSERT(cpuid_checkpass(cpu, 1));
4780         return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4781 }
4782 
4783 int
4784 cpuid_get_clogid(cpu_t *cpu)
4785 {
4786         ASSERT(cpuid_checkpass(cpu, 1));
4787         return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4788 }
4789 
4790 int
4791 cpuid_get_cacheid(cpu_t *cpu)
4792 {
4793         ASSERT(cpuid_checkpass(cpu, 1));
4794         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4795 }
4796 
4797 uint_t
4798 cpuid_get_procnodeid(cpu_t *cpu)
4799 {
4800         ASSERT(cpuid_checkpass(cpu, 1));
4801         return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4802 }
4803 
4804 uint_t
4805 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4806 {
4807         ASSERT(cpuid_checkpass(cpu, 1));
4808         return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4809 }
4810 
4811 uint_t
4812 cpuid_get_compunitid(cpu_t *cpu)
4813 {
4814         ASSERT(cpuid_checkpass(cpu, 1));
4815         return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4816 }
4817 
4818 uint_t
4819 cpuid_get_cores_per_compunit(cpu_t *cpu)
4820 {
4821         ASSERT(cpuid_checkpass(cpu, 1));
4822         return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4823 }
4824 
4825 /*ARGSUSED*/
4826 int
4827 cpuid_have_cr8access(cpu_t *cpu)
4828 {
4829 #if defined(__amd64)
4830         return (1);
4831 #else
4832         struct cpuid_info *cpi;
4833 
4834         ASSERT(cpu != NULL);
4835         cpi = cpu->cpu_m.mcpu_cpi;
4836         if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4837             (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4838                 return (1);
4839         return (0);
4840 #endif
4841 }
4842 
4843 uint32_t
4844 cpuid_get_apicid(cpu_t *cpu)
4845 {
4846         ASSERT(cpuid_checkpass(cpu, 1));
4847         if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4848                 return (UINT32_MAX);
4849         } else {
4850                 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4851         }
4852 }
4853 
4854 void
4855 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4856 {
4857         struct cpuid_info *cpi;
4858 
4859         if (cpu == NULL)
4860                 cpu = CPU;
4861         cpi = cpu->cpu_m.mcpu_cpi;
4862 
4863         ASSERT(cpuid_checkpass(cpu, 1));
4864 
4865         if (pabits)
4866                 *pabits = cpi->cpi_pabits;
4867         if (vabits)
4868                 *vabits = cpi->cpi_vabits;
4869 }
4870 
4871 size_t
4872 cpuid_get_xsave_size()
4873 {
4874         return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4875             sizeof (struct xsave_state)));
4876 }
4877 
4878 /*
4879  * Return true if the CPUs on this system require 'pointer clearing' for the
4880  * floating point error pointer exception handling. In the past, this has been
4881  * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4882  * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4883  * feature bit and is reflected in the cpi_fp_amd_save member.
4884  */
4885 boolean_t
4886 cpuid_need_fp_excp_handling()
4887 {
4888         return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4889             cpuid_info0.cpi_fp_amd_save != 0);
4890 }
4891 
4892 /*
4893  * Returns the number of data TLB entries for a corresponding
4894  * pagesize.  If it can't be computed, or isn't known, the
4895  * routine returns zero.  If you ask about an architecturally
4896  * impossible pagesize, the routine will panic (so that the
4897  * hat implementor knows that things are inconsistent.)
4898  */
4899 uint_t
4900 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4901 {
4902         struct cpuid_info *cpi;
4903         uint_t dtlb_nent = 0;
4904 
4905         if (cpu == NULL)
4906                 cpu = CPU;
4907         cpi = cpu->cpu_m.mcpu_cpi;
4908 
4909         ASSERT(cpuid_checkpass(cpu, 1));
4910 
4911         /*
4912          * Check the L2 TLB info
4913          */
4914         if (cpi->cpi_xmaxeax >= 0x80000006) {
4915                 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4916 
4917                 switch (pagesize) {
4918 
4919                 case 4 * 1024:
4920                         /*
4921                          * All zero in the top 16 bits of the register
4922                          * indicates a unified TLB. Size is in low 16 bits.
4923                          */
4924                         if ((cp->cp_ebx & 0xffff0000) == 0)
4925                                 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4926                         else
4927                                 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4928                         break;
4929 
4930                 case 2 * 1024 * 1024:
4931                         if ((cp->cp_eax & 0xffff0000) == 0)
4932                                 dtlb_nent = cp->cp_eax & 0x0000ffff;
4933                         else
4934                                 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4935                         break;
4936 
4937                 default:
4938                         panic("unknown L2 pagesize");
4939                         /*NOTREACHED*/
4940                 }
4941         }
4942 
4943         if (dtlb_nent != 0)
4944                 return (dtlb_nent);
4945 
4946         /*
4947          * No L2 TLB support for this size, try L1.
4948          */
4949         if (cpi->cpi_xmaxeax >= 0x80000005) {
4950                 struct cpuid_regs *cp = &cpi->cpi_extd[5];
4951 
4952                 switch (pagesize) {
4953                 case 4 * 1024:
4954                         dtlb_nent = BITX(cp->cp_ebx, 23, 16);
4955                         break;
4956                 case 2 * 1024 * 1024:
4957                         dtlb_nent = BITX(cp->cp_eax, 23, 16);
4958                         break;
4959                 default:
4960                         panic("unknown L1 d-TLB pagesize");
4961                         /*NOTREACHED*/
4962                 }
4963         }
4964 
4965         return (dtlb_nent);
4966 }
4967 
4968 /*
4969  * Return 0 if the erratum is not present or not applicable, positive
4970  * if it is, and negative if the status of the erratum is unknown.
4971  *
4972  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
4973  * Processors" #25759, Rev 3.57, August 2005
4974  */
4975 int
4976 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
4977 {
4978         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4979         uint_t eax;
4980 
4981         /*
4982          * Bail out if this CPU isn't an AMD CPU, or if it's
4983          * a legacy (32-bit) AMD CPU.
4984          */
4985         if (cpi->cpi_vendor != X86_VENDOR_AMD ||
4986             cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
4987             cpi->cpi_family == 6) {
4988                 return (0);
4989         }
4990 
4991         eax = cpi->cpi_std[1].cp_eax;
4992 
4993 #define SH_B0(eax)      (eax == 0xf40 || eax == 0xf50)
4994 #define SH_B3(eax)      (eax == 0xf51)
4995 #define B(eax)          (SH_B0(eax) || SH_B3(eax))
4996 
4997 #define SH_C0(eax)      (eax == 0xf48 || eax == 0xf58)
4998 
4999 #define SH_CG(eax)      (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5000 #define DH_CG(eax)      (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5001 #define CH_CG(eax)      (eax == 0xf82 || eax == 0xfb2)
5002 #define CG(eax)         (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5003 
5004 #define SH_D0(eax)      (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5005 #define DH_D0(eax)      (eax == 0x10fc0 || eax == 0x10ff0)
5006 #define CH_D0(eax)      (eax == 0x10f80 || eax == 0x10fb0)
5007 #define D0(eax)         (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5008 
5009 #define SH_E0(eax)      (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5010 #define JH_E1(eax)      (eax == 0x20f10)        /* JH8_E0 had 0x20f30 */
5011 #define DH_E3(eax)      (eax == 0x20fc0 || eax == 0x20ff0)
5012 #define SH_E4(eax)      (eax == 0x20f51 || eax == 0x20f71)
5013 #define BH_E4(eax)      (eax == 0x20fb1)
5014 #define SH_E5(eax)      (eax == 0x20f42)
5015 #define DH_E6(eax)      (eax == 0x20ff2 || eax == 0x20fc2)
5016 #define JH_E6(eax)      (eax == 0x20f12 || eax == 0x20f32)
5017 #define EX(eax)         (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5018                             SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5019                             DH_E6(eax) || JH_E6(eax))
5020 
5021 #define DR_AX(eax)      (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5022 #define DR_B0(eax)      (eax == 0x100f20)
5023 #define DR_B1(eax)      (eax == 0x100f21)
5024 #define DR_BA(eax)      (eax == 0x100f2a)
5025 #define DR_B2(eax)      (eax == 0x100f22)
5026 #define DR_B3(eax)      (eax == 0x100f23)
5027 #define RB_C0(eax)      (eax == 0x100f40)
5028 
5029         switch (erratum) {
5030         case 1:
5031                 return (cpi->cpi_family < 0x10);
5032         case 51:        /* what does the asterisk mean? */
5033                 return (B(eax) || SH_C0(eax) || CG(eax));
5034         case 52:
5035                 return (B(eax));
5036         case 57:
5037                 return (cpi->cpi_family <= 0x11);
5038         case 58:
5039                 return (B(eax));
5040         case 60:
5041                 return (cpi->cpi_family <= 0x11);
5042         case 61:
5043         case 62:
5044         case 63:
5045         case 64:
5046         case 65:
5047         case 66:
5048         case 68:
5049         case 69:
5050         case 70:
5051         case 71:
5052                 return (B(eax));
5053         case 72:
5054                 return (SH_B0(eax));
5055         case 74:
5056                 return (B(eax));
5057         case 75:
5058                 return (cpi->cpi_family < 0x10);
5059         case 76:
5060                 return (B(eax));
5061         case 77:
5062                 return (cpi->cpi_family <= 0x11);
5063         case 78:
5064                 return (B(eax) || SH_C0(eax));
5065         case 79:
5066                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5067         case 80:
5068         case 81:
5069         case 82:
5070                 return (B(eax));
5071         case 83:
5072                 return (B(eax) || SH_C0(eax) || CG(eax));
5073         case 85:
5074                 return (cpi->cpi_family < 0x10);
5075         case 86:
5076                 return (SH_C0(eax) || CG(eax));
5077         case 88:
5078 #if !defined(__amd64)
5079                 return (0);
5080 #else
5081                 return (B(eax) || SH_C0(eax));
5082 #endif
5083         case 89:
5084                 return (cpi->cpi_family < 0x10);
5085         case 90:
5086                 return (B(eax) || SH_C0(eax) || CG(eax));
5087         case 91:
5088         case 92:
5089                 return (B(eax) || SH_C0(eax));
5090         case 93:
5091                 return (SH_C0(eax));
5092         case 94:
5093                 return (B(eax) || SH_C0(eax) || CG(eax));
5094         case 95:
5095 #if !defined(__amd64)
5096                 return (0);
5097 #else
5098                 return (B(eax) || SH_C0(eax));
5099 #endif
5100         case 96:
5101                 return (B(eax) || SH_C0(eax) || CG(eax));
5102         case 97:
5103         case 98:
5104                 return (SH_C0(eax) || CG(eax));
5105         case 99:
5106                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5107         case 100:
5108                 return (B(eax) || SH_C0(eax));
5109         case 101:
5110         case 103:
5111                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5112         case 104:
5113                 return (SH_C0(eax) || CG(eax) || D0(eax));
5114         case 105:
5115         case 106:
5116         case 107:
5117                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5118         case 108:
5119                 return (DH_CG(eax));
5120         case 109:
5121                 return (SH_C0(eax) || CG(eax) || D0(eax));
5122         case 110:
5123                 return (D0(eax) || EX(eax));
5124         case 111:
5125                 return (CG(eax));
5126         case 112:
5127                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5128         case 113:
5129                 return (eax == 0x20fc0);
5130         case 114:
5131                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5132         case 115:
5133                 return (SH_E0(eax) || JH_E1(eax));
5134         case 116:
5135                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5136         case 117:
5137                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5138         case 118:
5139                 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5140                     JH_E6(eax));
5141         case 121:
5142                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5143         case 122:
5144                 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5145         case 123:
5146                 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5147         case 131:
5148                 return (cpi->cpi_family < 0x10);
5149         case 6336786:
5150 
5151                 /*
5152                  * Test for AdvPowerMgmtInfo.TscPStateInvariant
5153                  * if this is a K8 family or newer processor. We're testing for
5154                  * this 'erratum' to determine whether or not we have a constant
5155                  * TSC.
5156                  *
5157                  * Our current fix for this is to disable the C1-Clock ramping.
5158                  * However, this doesn't work on newer processor families nor
5159                  * does it work when virtualized as those devices don't exist.
5160                  */
5161                 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5162                         return (0);
5163                 }
5164 
5165                 if (CPI_FAMILY(cpi) == 0xf) {
5166                         struct cpuid_regs regs;
5167                         regs.cp_eax = 0x80000007;
5168                         (void) __cpuid_insn(&regs);
5169                         return (!(regs.cp_edx & 0x100));
5170                 }
5171                 return (0);
5172         case 6323525:
5173                 /*
5174                  * This erratum (K8 #147) is not present on family 10 and newer.
5175                  */
5176                 if (cpi->cpi_family >= 0x10) {
5177                         return (0);
5178                 }
5179                 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5180                     (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5181 
5182         case 6671130:
5183                 /*
5184                  * check for processors (pre-Shanghai) that do not provide
5185                  * optimal management of 1gb ptes in its tlb.
5186                  */
5187                 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5188 
5189         case 298:
5190                 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5191                     DR_B2(eax) || RB_C0(eax));
5192 
5193         case 721:
5194 #if defined(__amd64)
5195                 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5196 #else
5197                 return (0);
5198 #endif
5199 
5200         default:
5201                 return (-1);
5202 
5203         }
5204 }
5205 
5206 /*
5207  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5208  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5209  */
5210 int
5211 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5212 {
5213         struct cpuid_info       *cpi;
5214         uint_t                  osvwid;
5215         static int              osvwfeature = -1;
5216         uint64_t                osvwlength;
5217 
5218 
5219         cpi = cpu->cpu_m.mcpu_cpi;
5220 
5221         /* confirm OSVW supported */
5222         if (osvwfeature == -1) {
5223                 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5224         } else {
5225                 /* assert that osvw feature setting is consistent on all cpus */
5226                 ASSERT(osvwfeature ==
5227                     (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5228         }
5229         if (!osvwfeature)
5230                 return (-1);
5231 
5232         osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5233 
5234         switch (erratum) {
5235         case 298:       /* osvwid is 0 */
5236                 osvwid = 0;
5237                 if (osvwlength <= (uint64_t)osvwid) {
5238                         /* osvwid 0 is unknown */
5239                         return (-1);
5240                 }
5241 
5242                 /*
5243                  * Check the OSVW STATUS MSR to determine the state
5244                  * of the erratum where:
5245                  *   0 - fixed by HW
5246                  *   1 - BIOS has applied the workaround when BIOS
5247                  *   workaround is available. (Or for other errata,
5248                  *   OS workaround is required.)
5249                  * For a value of 1, caller will confirm that the
5250                  * erratum 298 workaround has indeed been applied by BIOS.
5251                  *
5252                  * A 1 may be set in cpus that have a HW fix
5253                  * in a mixed cpu system. Regarding erratum 298:
5254                  *   In a multiprocessor platform, the workaround above
5255                  *   should be applied to all processors regardless of
5256                  *   silicon revision when an affected processor is
5257                  *   present.
5258                  */
5259 
5260                 return (rdmsr(MSR_AMD_OSVW_STATUS +
5261                     (osvwid / OSVW_ID_CNT_PER_MSR)) &
5262                     (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5263 
5264         default:
5265                 return (-1);
5266         }
5267 }
5268 
5269 static const char assoc_str[] = "associativity";
5270 static const char line_str[] = "line-size";
5271 static const char size_str[] = "size";
5272 
5273 static void
5274 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5275     uint32_t val)
5276 {
5277         char buf[128];
5278 
5279         /*
5280          * ndi_prop_update_int() is used because it is desirable for
5281          * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5282          */
5283         if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5284                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5285 }
5286 
5287 /*
5288  * Intel-style cache/tlb description
5289  *
5290  * Standard cpuid level 2 gives a randomly ordered
5291  * selection of tags that index into a table that describes
5292  * cache and tlb properties.
5293  */
5294 
5295 static const char l1_icache_str[] = "l1-icache";
5296 static const char l1_dcache_str[] = "l1-dcache";
5297 static const char l2_cache_str[] = "l2-cache";
5298 static const char l3_cache_str[] = "l3-cache";
5299 static const char itlb4k_str[] = "itlb-4K";
5300 static const char dtlb4k_str[] = "dtlb-4K";
5301 static const char itlb2M_str[] = "itlb-2M";
5302 static const char itlb4M_str[] = "itlb-4M";
5303 static const char dtlb4M_str[] = "dtlb-4M";
5304 static const char dtlb24_str[] = "dtlb0-2M-4M";
5305 static const char itlb424_str[] = "itlb-4K-2M-4M";
5306 static const char itlb24_str[] = "itlb-2M-4M";
5307 static const char dtlb44_str[] = "dtlb-4K-4M";
5308 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5309 static const char sl2_cache_str[] = "sectored-l2-cache";
5310 static const char itrace_str[] = "itrace-cache";
5311 static const char sl3_cache_str[] = "sectored-l3-cache";
5312 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5313 
5314 static const struct cachetab {
5315         uint8_t         ct_code;
5316         uint8_t         ct_assoc;
5317         uint16_t        ct_line_size;
5318         size_t          ct_size;
5319         const char      *ct_label;
5320 } intel_ctab[] = {
5321         /*
5322          * maintain descending order!
5323          *
5324          * Codes ignored - Reason
5325          * ----------------------
5326          * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5327          * f0H/f1H - Currently we do not interpret prefetch size by design
5328          */
5329         { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5330         { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5331         { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5332         { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5333         { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5334         { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5335         { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5336         { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5337         { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5338         { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5339         { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5340         { 0xd0, 4, 64, 512*1024, l3_cache_str},
5341         { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5342         { 0xc0, 4, 0, 8, dtlb44_str },
5343         { 0xba, 4, 0, 64, dtlb4k_str },
5344         { 0xb4, 4, 0, 256, dtlb4k_str },
5345         { 0xb3, 4, 0, 128, dtlb4k_str },
5346         { 0xb2, 4, 0, 64, itlb4k_str },
5347         { 0xb0, 4, 0, 128, itlb4k_str },
5348         { 0x87, 8, 64, 1024*1024, l2_cache_str},
5349         { 0x86, 4, 64, 512*1024, l2_cache_str},
5350         { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5351         { 0x84, 8, 32, 1024*1024, l2_cache_str},
5352         { 0x83, 8, 32, 512*1024, l2_cache_str},
5353         { 0x82, 8, 32, 256*1024, l2_cache_str},
5354         { 0x80, 8, 64, 512*1024, l2_cache_str},
5355         { 0x7f, 2, 64, 512*1024, l2_cache_str},
5356         { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5357         { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5358         { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5359         { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5360         { 0x79, 8, 64, 128*1024, sl2_cache_str},
5361         { 0x78, 8, 64, 1024*1024, l2_cache_str},
5362         { 0x73, 8, 0, 64*1024, itrace_str},
5363         { 0x72, 8, 0, 32*1024, itrace_str},
5364         { 0x71, 8, 0, 16*1024, itrace_str},
5365         { 0x70, 8, 0, 12*1024, itrace_str},
5366         { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5367         { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5368         { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5369         { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5370         { 0x5d, 0, 0, 256, dtlb44_str},
5371         { 0x5c, 0, 0, 128, dtlb44_str},
5372         { 0x5b, 0, 0, 64, dtlb44_str},
5373         { 0x5a, 4, 0, 32, dtlb24_str},
5374         { 0x59, 0, 0, 16, dtlb4k_str},
5375         { 0x57, 4, 0, 16, dtlb4k_str},
5376         { 0x56, 4, 0, 16, dtlb4M_str},
5377         { 0x55, 0, 0, 7, itlb24_str},
5378         { 0x52, 0, 0, 256, itlb424_str},
5379         { 0x51, 0, 0, 128, itlb424_str},
5380         { 0x50, 0, 0, 64, itlb424_str},
5381         { 0x4f, 0, 0, 32, itlb4k_str},
5382         { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5383         { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5384         { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5385         { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5386         { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5387         { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5388         { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5389         { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5390         { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5391         { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5392         { 0x44, 4, 32, 1024*1024, l2_cache_str},
5393         { 0x43, 4, 32, 512*1024, l2_cache_str},
5394         { 0x42, 4, 32, 256*1024, l2_cache_str},
5395         { 0x41, 4, 32, 128*1024, l2_cache_str},
5396         { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5397         { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5398         { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5399         { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5400         { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5401         { 0x39, 4, 64, 128*1024, sl2_cache_str},
5402         { 0x30, 8, 64, 32*1024, l1_icache_str},
5403         { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5404         { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5405         { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5406         { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5407         { 0x22, 4, 64, 512*1024, sl3_cache_str},
5408         { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5409         { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5410         { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5411         { 0x0b, 4, 0, 4, itlb4M_str},
5412         { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5413         { 0x08, 4, 32, 16*1024, l1_icache_str},
5414         { 0x06, 4, 32, 8*1024, l1_icache_str},
5415         { 0x05, 4, 0, 32, dtlb4M_str},
5416         { 0x04, 4, 0, 8, dtlb4M_str},
5417         { 0x03, 4, 0, 64, dtlb4k_str},
5418         { 0x02, 4, 0, 2, itlb4M_str},
5419         { 0x01, 4, 0, 32, itlb4k_str},
5420         { 0 }
5421 };
5422 
5423 static const struct cachetab cyrix_ctab[] = {
5424         { 0x70, 4, 0, 32, "tlb-4K" },
5425         { 0x80, 4, 16, 16*1024, "l1-cache" },
5426         { 0 }
5427 };
5428 
5429 /*
5430  * Search a cache table for a matching entry
5431  */
5432 static const struct cachetab *
5433 find_cacheent(const struct cachetab *ct, uint_t code)
5434 {
5435         if (code != 0) {
5436                 for (; ct->ct_code != 0; ct++)
5437                         if (ct->ct_code <= code)
5438                                 break;
5439                 if (ct->ct_code == code)
5440                         return (ct);
5441         }
5442         return (NULL);
5443 }
5444 
5445 /*
5446  * Populate cachetab entry with L2 or L3 cache-information using
5447  * cpuid function 4. This function is called from intel_walk_cacheinfo()
5448  * when descriptor 0x49 is encountered. It returns 0 if no such cache
5449  * information is found.
5450  */
5451 static int
5452 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5453 {
5454         uint32_t level, i;
5455         int ret = 0;
5456 
5457         for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5458                 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5459 
5460                 if (level == 2 || level == 3) {
5461                         ct->ct_assoc =
5462                             CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5463                         ct->ct_line_size =
5464                             CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5465                         ct->ct_size = ct->ct_assoc *
5466                             (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5467                             ct->ct_line_size *
5468                             (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5469 
5470                         if (level == 2) {
5471                                 ct->ct_label = l2_cache_str;
5472                         } else if (level == 3) {
5473                                 ct->ct_label = l3_cache_str;
5474                         }
5475                         ret = 1;
5476                 }
5477         }
5478 
5479         return (ret);
5480 }
5481 
5482 /*
5483  * Walk the cacheinfo descriptor, applying 'func' to every valid element
5484  * The walk is terminated if the walker returns non-zero.
5485  */
5486 static void
5487 intel_walk_cacheinfo(struct cpuid_info *cpi,
5488     void *arg, int (*func)(void *, const struct cachetab *))
5489 {
5490         const struct cachetab *ct;
5491         struct cachetab des_49_ct, des_b1_ct;
5492         uint8_t *dp;
5493         int i;
5494 
5495         if ((dp = cpi->cpi_cacheinfo) == NULL)
5496                 return;
5497         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5498                 /*
5499                  * For overloaded descriptor 0x49 we use cpuid function 4
5500                  * if supported by the current processor, to create
5501                  * cache information.
5502                  * For overloaded descriptor 0xb1 we use X86_PAE flag
5503                  * to disambiguate the cache information.
5504                  */
5505                 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5506                     intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5507                                 ct = &des_49_ct;
5508                 } else if (*dp == 0xb1) {
5509                         des_b1_ct.ct_code = 0xb1;
5510                         des_b1_ct.ct_assoc = 4;
5511                         des_b1_ct.ct_line_size = 0;
5512                         if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5513                                 des_b1_ct.ct_size = 8;
5514                                 des_b1_ct.ct_label = itlb2M_str;
5515                         } else {
5516                                 des_b1_ct.ct_size = 4;
5517                                 des_b1_ct.ct_label = itlb4M_str;
5518                         }
5519                         ct = &des_b1_ct;
5520                 } else {
5521                         if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5522                                 continue;
5523                         }
5524                 }
5525 
5526                 if (func(arg, ct) != 0) {
5527                         break;
5528                 }
5529         }
5530 }
5531 
5532 /*
5533  * (Like the Intel one, except for Cyrix CPUs)
5534  */
5535 static void
5536 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5537     void *arg, int (*func)(void *, const struct cachetab *))
5538 {
5539         const struct cachetab *ct;
5540         uint8_t *dp;
5541         int i;
5542 
5543         if ((dp = cpi->cpi_cacheinfo) == NULL)
5544                 return;
5545         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5546                 /*
5547                  * Search Cyrix-specific descriptor table first ..
5548                  */
5549                 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5550                         if (func(arg, ct) != 0)
5551                                 break;
5552                         continue;
5553                 }
5554                 /*
5555                  * .. else fall back to the Intel one
5556                  */
5557                 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5558                         if (func(arg, ct) != 0)
5559                                 break;
5560                         continue;
5561                 }
5562         }
5563 }
5564 
5565 /*
5566  * A cacheinfo walker that adds associativity, line-size, and size properties
5567  * to the devinfo node it is passed as an argument.
5568  */
5569 static int
5570 add_cacheent_props(void *arg, const struct cachetab *ct)
5571 {
5572         dev_info_t *devi = arg;
5573 
5574         add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5575         if (ct->ct_line_size != 0)
5576                 add_cache_prop(devi, ct->ct_label, line_str,
5577                     ct->ct_line_size);
5578         add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5579         return (0);
5580 }
5581 
5582 
5583 static const char fully_assoc[] = "fully-associative?";
5584 
5585 /*
5586  * AMD style cache/tlb description
5587  *
5588  * Extended functions 5 and 6 directly describe properties of
5589  * tlbs and various cache levels.
5590  */
5591 static void
5592 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5593 {
5594         switch (assoc) {
5595         case 0: /* reserved; ignore */
5596                 break;
5597         default:
5598                 add_cache_prop(devi, label, assoc_str, assoc);
5599                 break;
5600         case 0xff:
5601                 add_cache_prop(devi, label, fully_assoc, 1);
5602                 break;
5603         }
5604 }
5605 
5606 static void
5607 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5608 {
5609         if (size == 0)
5610                 return;
5611         add_cache_prop(devi, label, size_str, size);
5612         add_amd_assoc(devi, label, assoc);
5613 }
5614 
5615 static void
5616 add_amd_cache(dev_info_t *devi, const char *label,
5617     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5618 {
5619         if (size == 0 || line_size == 0)
5620                 return;
5621         add_amd_assoc(devi, label, assoc);
5622         /*
5623          * Most AMD parts have a sectored cache. Multiple cache lines are
5624          * associated with each tag. A sector consists of all cache lines
5625          * associated with a tag. For example, the AMD K6-III has a sector
5626          * size of 2 cache lines per tag.
5627          */
5628         if (lines_per_tag != 0)
5629                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5630         add_cache_prop(devi, label, line_str, line_size);
5631         add_cache_prop(devi, label, size_str, size * 1024);
5632 }
5633 
5634 static void
5635 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5636 {
5637         switch (assoc) {
5638         case 0: /* off */
5639                 break;
5640         case 1:
5641         case 2:
5642         case 4:
5643                 add_cache_prop(devi, label, assoc_str, assoc);
5644                 break;
5645         case 6:
5646                 add_cache_prop(devi, label, assoc_str, 8);
5647                 break;
5648         case 8:
5649                 add_cache_prop(devi, label, assoc_str, 16);
5650                 break;
5651         case 0xf:
5652                 add_cache_prop(devi, label, fully_assoc, 1);
5653                 break;
5654         default: /* reserved; ignore */
5655                 break;
5656         }
5657 }
5658 
5659 static void
5660 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5661 {
5662         if (size == 0 || assoc == 0)
5663                 return;
5664         add_amd_l2_assoc(devi, label, assoc);
5665         add_cache_prop(devi, label, size_str, size);
5666 }
5667 
5668 static void
5669 add_amd_l2_cache(dev_info_t *devi, const char *label,
5670     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5671 {
5672         if (size == 0 || assoc == 0 || line_size == 0)
5673                 return;
5674         add_amd_l2_assoc(devi, label, assoc);
5675         if (lines_per_tag != 0)
5676                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5677         add_cache_prop(devi, label, line_str, line_size);
5678         add_cache_prop(devi, label, size_str, size * 1024);
5679 }
5680 
5681 static void
5682 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5683 {
5684         struct cpuid_regs *cp;
5685 
5686         if (cpi->cpi_xmaxeax < 0x80000005)
5687                 return;
5688         cp = &cpi->cpi_extd[5];
5689 
5690         /*
5691          * 4M/2M L1 TLB configuration
5692          *
5693          * We report the size for 2M pages because AMD uses two
5694          * TLB entries for one 4M page.
5695          */
5696         add_amd_tlb(devi, "dtlb-2M",
5697             BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5698         add_amd_tlb(devi, "itlb-2M",
5699             BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5700 
5701         /*
5702          * 4K L1 TLB configuration
5703          */
5704 
5705         switch (cpi->cpi_vendor) {
5706                 uint_t nentries;
5707         case X86_VENDOR_TM:
5708                 if (cpi->cpi_family >= 5) {
5709                         /*
5710                          * Crusoe processors have 256 TLB entries, but
5711                          * cpuid data format constrains them to only
5712                          * reporting 255 of them.
5713                          */
5714                         if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5715                                 nentries = 256;
5716                         /*
5717                          * Crusoe processors also have a unified TLB
5718                          */
5719                         add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5720                             nentries);
5721                         break;
5722                 }
5723                 /*FALLTHROUGH*/
5724         default:
5725                 add_amd_tlb(devi, itlb4k_str,
5726                     BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5727                 add_amd_tlb(devi, dtlb4k_str,
5728                     BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5729                 break;
5730         }
5731 
5732         /*
5733          * data L1 cache configuration
5734          */
5735 
5736         add_amd_cache(devi, l1_dcache_str,
5737             BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5738             BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5739 
5740         /*
5741          * code L1 cache configuration
5742          */
5743 
5744         add_amd_cache(devi, l1_icache_str,
5745             BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5746             BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5747 
5748         if (cpi->cpi_xmaxeax < 0x80000006)
5749                 return;
5750         cp = &cpi->cpi_extd[6];
5751 
5752         /* Check for a unified L2 TLB for large pages */
5753 
5754         if (BITX(cp->cp_eax, 31, 16) == 0)
5755                 add_amd_l2_tlb(devi, "l2-tlb-2M",
5756                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5757         else {
5758                 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5759                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5760                 add_amd_l2_tlb(devi, "l2-itlb-2M",
5761                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5762         }
5763 
5764         /* Check for a unified L2 TLB for 4K pages */
5765 
5766         if (BITX(cp->cp_ebx, 31, 16) == 0) {
5767                 add_amd_l2_tlb(devi, "l2-tlb-4K",
5768                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5769         } else {
5770                 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5771                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5772                 add_amd_l2_tlb(devi, "l2-itlb-4K",
5773                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5774         }
5775 
5776         add_amd_l2_cache(devi, l2_cache_str,
5777             BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5778             BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5779 }
5780 
5781 /*
5782  * There are two basic ways that the x86 world describes it cache
5783  * and tlb architecture - Intel's way and AMD's way.
5784  *
5785  * Return which flavor of cache architecture we should use
5786  */
5787 static int
5788 x86_which_cacheinfo(struct cpuid_info *cpi)
5789 {
5790         switch (cpi->cpi_vendor) {
5791         case X86_VENDOR_Intel:
5792                 if (cpi->cpi_maxeax >= 2)
5793                         return (X86_VENDOR_Intel);
5794                 break;
5795         case X86_VENDOR_AMD:
5796                 /*
5797                  * The K5 model 1 was the first part from AMD that reported
5798                  * cache sizes via extended cpuid functions.
5799                  */
5800                 if (cpi->cpi_family > 5 ||
5801                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5802                         return (X86_VENDOR_AMD);
5803                 break;
5804         case X86_VENDOR_TM:
5805                 if (cpi->cpi_family >= 5)
5806                         return (X86_VENDOR_AMD);
5807                 /*FALLTHROUGH*/
5808         default:
5809                 /*
5810                  * If they have extended CPU data for 0x80000005
5811                  * then we assume they have AMD-format cache
5812                  * information.
5813                  *
5814                  * If not, and the vendor happens to be Cyrix,
5815                  * then try our-Cyrix specific handler.
5816                  *
5817                  * If we're not Cyrix, then assume we're using Intel's
5818                  * table-driven format instead.
5819                  */
5820                 if (cpi->cpi_xmaxeax >= 0x80000005)
5821                         return (X86_VENDOR_AMD);
5822                 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5823                         return (X86_VENDOR_Cyrix);
5824                 else if (cpi->cpi_maxeax >= 2)
5825                         return (X86_VENDOR_Intel);
5826                 break;
5827         }
5828         return (-1);
5829 }
5830 
5831 void
5832 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5833     struct cpuid_info *cpi)
5834 {
5835         dev_info_t *cpu_devi;
5836         int create;
5837 
5838         cpu_devi = (dev_info_t *)dip;
5839 
5840         /* device_type */
5841         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5842             "device_type", "cpu");
5843 
5844         /* reg */
5845         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5846             "reg", cpu_id);
5847 
5848         /* cpu-mhz, and clock-frequency */
5849         if (cpu_freq > 0) {
5850                 long long mul;
5851 
5852                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5853                     "cpu-mhz", cpu_freq);
5854                 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5855                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5856                             "clock-frequency", (int)mul);
5857         }
5858 
5859         if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5860                 return;
5861         }
5862 
5863         /* vendor-id */
5864         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5865             "vendor-id", cpi->cpi_vendorstr);
5866 
5867         if (cpi->cpi_maxeax == 0) {
5868                 return;
5869         }
5870 
5871         /*
5872          * family, model, and step
5873          */
5874         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5875             "family", CPI_FAMILY(cpi));
5876         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5877             "cpu-model", CPI_MODEL(cpi));
5878         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5879             "stepping-id", CPI_STEP(cpi));
5880 
5881         /* type */
5882         switch (cpi->cpi_vendor) {
5883         case X86_VENDOR_Intel:
5884                 create = 1;
5885                 break;
5886         default:
5887                 create = 0;
5888                 break;
5889         }
5890         if (create)
5891                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5892                     "type", CPI_TYPE(cpi));
5893 
5894         /* ext-family */
5895         switch (cpi->cpi_vendor) {
5896         case X86_VENDOR_Intel:
5897         case X86_VENDOR_AMD:
5898                 create = cpi->cpi_family >= 0xf;
5899                 break;
5900         default:
5901                 create = 0;
5902                 break;
5903         }
5904         if (create)
5905                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5906                     "ext-family", CPI_FAMILY_XTD(cpi));
5907 
5908         /* ext-model */
5909         switch (cpi->cpi_vendor) {
5910         case X86_VENDOR_Intel:
5911                 create = IS_EXTENDED_MODEL_INTEL(cpi);
5912                 break;
5913         case X86_VENDOR_AMD:
5914                 create = CPI_FAMILY(cpi) == 0xf;
5915                 break;
5916         default:
5917                 create = 0;
5918                 break;
5919         }
5920         if (create)
5921                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5922                     "ext-model", CPI_MODEL_XTD(cpi));
5923 
5924         /* generation */
5925         switch (cpi->cpi_vendor) {
5926         case X86_VENDOR_AMD:
5927                 /*
5928                  * AMD K5 model 1 was the first part to support this
5929                  */
5930                 create = cpi->cpi_xmaxeax >= 0x80000001;
5931                 break;
5932         default:
5933                 create = 0;
5934                 break;
5935         }
5936         if (create)
5937                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5938                     "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5939 
5940         /* brand-id */
5941         switch (cpi->cpi_vendor) {
5942         case X86_VENDOR_Intel:
5943                 /*
5944                  * brand id first appeared on Pentium III Xeon model 8,
5945                  * and Celeron model 8 processors and Opteron
5946                  */
5947                 create = cpi->cpi_family > 6 ||
5948                     (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5949                 break;
5950         case X86_VENDOR_AMD:
5951                 create = cpi->cpi_family >= 0xf;
5952                 break;
5953         default:
5954                 create = 0;
5955                 break;
5956         }
5957         if (create && cpi->cpi_brandid != 0) {
5958                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5959                     "brand-id", cpi->cpi_brandid);
5960         }
5961 
5962         /* chunks, and apic-id */
5963         switch (cpi->cpi_vendor) {
5964                 /*
5965                  * first available on Pentium IV and Opteron (K8)
5966                  */
5967         case X86_VENDOR_Intel:
5968                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5969                 break;
5970         case X86_VENDOR_AMD:
5971                 create = cpi->cpi_family >= 0xf;
5972                 break;
5973         default:
5974                 create = 0;
5975                 break;
5976         }
5977         if (create) {
5978                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5979                     "chunks", CPI_CHUNKS(cpi));
5980                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5981                     "apic-id", cpi->cpi_apicid);
5982                 if (cpi->cpi_chipid >= 0) {
5983                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5984                             "chip#", cpi->cpi_chipid);
5985                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5986                             "clog#", cpi->cpi_clogid);
5987                 }
5988         }
5989 
5990         /* cpuid-features */
5991         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5992             "cpuid-features", CPI_FEATURES_EDX(cpi));
5993 
5994 
5995         /* cpuid-features-ecx */
5996         switch (cpi->cpi_vendor) {
5997         case X86_VENDOR_Intel:
5998                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5999                 break;
6000         case X86_VENDOR_AMD:
6001                 create = cpi->cpi_family >= 0xf;
6002                 break;
6003         default:
6004                 create = 0;
6005                 break;
6006         }
6007         if (create)
6008                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6009                     "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6010 
6011         /* ext-cpuid-features */
6012         switch (cpi->cpi_vendor) {
6013         case X86_VENDOR_Intel:
6014         case X86_VENDOR_AMD:
6015         case X86_VENDOR_Cyrix:
6016         case X86_VENDOR_TM:
6017         case X86_VENDOR_Centaur:
6018                 create = cpi->cpi_xmaxeax >= 0x80000001;
6019                 break;
6020         default:
6021                 create = 0;
6022                 break;
6023         }
6024         if (create) {
6025                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6026                     "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6027                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6028                     "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6029         }
6030 
6031         /*
6032          * Brand String first appeared in Intel Pentium IV, AMD K5
6033          * model 1, and Cyrix GXm.  On earlier models we try and
6034          * simulate something similar .. so this string should always
6035          * same -something- about the processor, however lame.
6036          */
6037         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6038             "brand-string", cpi->cpi_brandstr);
6039 
6040         /*
6041          * Finally, cache and tlb information
6042          */
6043         switch (x86_which_cacheinfo(cpi)) {
6044         case X86_VENDOR_Intel:
6045                 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6046                 break;
6047         case X86_VENDOR_Cyrix:
6048                 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6049                 break;
6050         case X86_VENDOR_AMD:
6051                 amd_cache_info(cpi, cpu_devi);
6052                 break;
6053         default:
6054                 break;
6055         }
6056 }
6057 
6058 struct l2info {
6059         int *l2i_csz;
6060         int *l2i_lsz;
6061         int *l2i_assoc;
6062         int l2i_ret;
6063 };
6064 
6065 /*
6066  * A cacheinfo walker that fetches the size, line-size and associativity
6067  * of the L2 cache
6068  */
6069 static int
6070 intel_l2cinfo(void *arg, const struct cachetab *ct)
6071 {
6072         struct l2info *l2i = arg;
6073         int *ip;
6074 
6075         if (ct->ct_label != l2_cache_str &&
6076             ct->ct_label != sl2_cache_str)
6077                 return (0);     /* not an L2 -- keep walking */
6078 
6079         if ((ip = l2i->l2i_csz) != NULL)
6080                 *ip = ct->ct_size;
6081         if ((ip = l2i->l2i_lsz) != NULL)
6082                 *ip = ct->ct_line_size;
6083         if ((ip = l2i->l2i_assoc) != NULL)
6084                 *ip = ct->ct_assoc;
6085         l2i->l2i_ret = ct->ct_size;
6086         return (1);             /* was an L2 -- terminate walk */
6087 }
6088 
6089 /*
6090  * AMD L2/L3 Cache and TLB Associativity Field Definition:
6091  *
6092  *      Unlike the associativity for the L1 cache and tlb where the 8 bit
6093  *      value is the associativity, the associativity for the L2 cache and
6094  *      tlb is encoded in the following table. The 4 bit L2 value serves as
6095  *      an index into the amd_afd[] array to determine the associativity.
6096  *      -1 is undefined. 0 is fully associative.
6097  */
6098 
6099 static int amd_afd[] =
6100         {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6101 
6102 static void
6103 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6104 {
6105         struct cpuid_regs *cp;
6106         uint_t size, assoc;
6107         int i;
6108         int *ip;
6109 
6110         if (cpi->cpi_xmaxeax < 0x80000006)
6111                 return;
6112         cp = &cpi->cpi_extd[6];
6113 
6114         if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6115             (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6116                 uint_t cachesz = size * 1024;
6117                 assoc = amd_afd[i];
6118 
6119                 ASSERT(assoc != -1);
6120 
6121                 if ((ip = l2i->l2i_csz) != NULL)
6122                         *ip = cachesz;
6123                 if ((ip = l2i->l2i_lsz) != NULL)
6124                         *ip = BITX(cp->cp_ecx, 7, 0);
6125                 if ((ip = l2i->l2i_assoc) != NULL)
6126                         *ip = assoc;
6127                 l2i->l2i_ret = cachesz;
6128         }
6129 }
6130 
6131 int
6132 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6133 {
6134         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6135         struct l2info __l2info, *l2i = &__l2info;
6136 
6137         l2i->l2i_csz = csz;
6138         l2i->l2i_lsz = lsz;
6139         l2i->l2i_assoc = assoc;
6140         l2i->l2i_ret = -1;
6141 
6142         switch (x86_which_cacheinfo(cpi)) {
6143         case X86_VENDOR_Intel:
6144                 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6145                 break;
6146         case X86_VENDOR_Cyrix:
6147                 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6148                 break;
6149         case X86_VENDOR_AMD:
6150                 amd_l2cacheinfo(cpi, l2i);
6151                 break;
6152         default:
6153                 break;
6154         }
6155         return (l2i->l2i_ret);
6156 }
6157 
6158 #if !defined(__xpv)
6159 
6160 uint32_t *
6161 cpuid_mwait_alloc(cpu_t *cpu)
6162 {
6163         uint32_t        *ret;
6164         size_t          mwait_size;
6165 
6166         ASSERT(cpuid_checkpass(CPU, 2));
6167 
6168         mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6169         if (mwait_size == 0)
6170                 return (NULL);
6171 
6172         /*
6173          * kmem_alloc() returns cache line size aligned data for mwait_size
6174          * allocations.  mwait_size is currently cache line sized.  Neither
6175          * of these implementation details are guarantied to be true in the
6176          * future.
6177          *
6178          * First try allocating mwait_size as kmem_alloc() currently returns
6179          * correctly aligned memory.  If kmem_alloc() does not return
6180          * mwait_size aligned memory, then use mwait_size ROUNDUP.
6181          *
6182          * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6183          * decide to free this memory.
6184          */
6185         ret = kmem_zalloc(mwait_size, KM_SLEEP);
6186         if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6187                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6188                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6189                 *ret = MWAIT_RUNNING;
6190                 return (ret);
6191         } else {
6192                 kmem_free(ret, mwait_size);
6193                 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6194                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6195                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6196                 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6197                 *ret = MWAIT_RUNNING;
6198                 return (ret);
6199         }
6200 }
6201 
6202 void
6203 cpuid_mwait_free(cpu_t *cpu)
6204 {
6205         if (cpu->cpu_m.mcpu_cpi == NULL) {
6206                 return;
6207         }
6208 
6209         if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6210             cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6211                 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6212                     cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6213         }
6214 
6215         cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6216         cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6217 }
6218 
6219 void
6220 patch_tsc_read(int flag)
6221 {
6222         size_t cnt;
6223 
6224         switch (flag) {
6225         case TSC_NONE:
6226                 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6227                 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6228                 break;
6229         case TSC_RDTSC_MFENCE:
6230                 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6231                 (void) memcpy((void *)tsc_read,
6232                     (void *)&_tsc_mfence_start, cnt);
6233                 break;
6234         case TSC_RDTSC_LFENCE:
6235                 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6236                 (void) memcpy((void *)tsc_read,
6237                     (void *)&_tsc_lfence_start, cnt);
6238                 break;
6239         case TSC_TSCP:
6240                 cnt = &_tscp_end - &_tscp_start;
6241                 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6242                 break;
6243         default:
6244                 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6245                 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6246                 break;
6247         }
6248         tsc_type = flag;
6249 }
6250 
6251 int
6252 cpuid_deep_cstates_supported(void)
6253 {
6254         struct cpuid_info *cpi;
6255         struct cpuid_regs regs;
6256 
6257         ASSERT(cpuid_checkpass(CPU, 1));
6258 
6259         cpi = CPU->cpu_m.mcpu_cpi;
6260 
6261         if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6262                 return (0);
6263 
6264         switch (cpi->cpi_vendor) {
6265         case X86_VENDOR_Intel:
6266                 if (cpi->cpi_xmaxeax < 0x80000007)
6267                         return (0);
6268 
6269                 /*
6270                  * TSC run at a constant rate in all ACPI C-states?
6271                  */
6272                 regs.cp_eax = 0x80000007;
6273                 (void) __cpuid_insn(&regs);
6274                 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6275 
6276         default:
6277                 return (0);
6278         }
6279 }
6280 
6281 #endif  /* !__xpv */
6282 
6283 void
6284 post_startup_cpu_fixups(void)
6285 {
6286 #ifndef __xpv
6287         /*
6288          * Some AMD processors support C1E state. Entering this state will
6289          * cause the local APIC timer to stop, which we can't deal with at
6290          * this time.
6291          */
6292         if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6293                 on_trap_data_t otd;
6294                 uint64_t reg;
6295 
6296                 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6297                         reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6298                         /* Disable C1E state if it is enabled by BIOS */
6299                         if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6300                             AMD_ACTONCMPHALT_MASK) {
6301                                 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6302                                     AMD_ACTONCMPHALT_SHIFT);
6303                                 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6304                         }
6305                 }
6306                 no_trap();
6307         }
6308 #endif  /* !__xpv */
6309 }
6310 
6311 void
6312 enable_pcid(void)
6313 {
6314         if (x86_use_pcid == -1)
6315                 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6316 
6317         if (x86_use_invpcid == -1) {
6318                 x86_use_invpcid = is_x86_feature(x86_featureset,
6319                     X86FSET_INVPCID);
6320         }
6321 
6322         if (!x86_use_pcid)
6323                 return;
6324 
6325         /*
6326          * Intel say that on setting PCIDE, it immediately starts using the PCID
6327          * bits; better make sure there's nothing there.
6328          */
6329         ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6330 
6331         setcr4(getcr4() | CR4_PCIDE);
6332 }
6333 
6334 /*
6335  * Setup necessary registers to enable XSAVE feature on this processor.
6336  * This function needs to be called early enough, so that no xsave/xrstor
6337  * ops will execute on the processor before the MSRs are properly set up.
6338  *
6339  * Current implementation has the following assumption:
6340  * - cpuid_pass1() is done, so that X86 features are known.
6341  * - fpu_probe() is done, so that fp_save_mech is chosen.
6342  */
6343 void
6344 xsave_setup_msr(cpu_t *cpu)
6345 {
6346         ASSERT(fp_save_mech == FP_XSAVE);
6347         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6348 
6349         /* Enable OSXSAVE in CR4. */
6350         setcr4(getcr4() | CR4_OSXSAVE);
6351         /*
6352          * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6353          * correct value.
6354          */
6355         cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6356         setup_xfem();
6357 }
6358 
6359 /*
6360  * Starting with the Westmere processor the local
6361  * APIC timer will continue running in all C-states,
6362  * including the deepest C-states.
6363  */
6364 int
6365 cpuid_arat_supported(void)
6366 {
6367         struct cpuid_info *cpi;
6368         struct cpuid_regs regs;
6369 
6370         ASSERT(cpuid_checkpass(CPU, 1));
6371         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6372 
6373         cpi = CPU->cpu_m.mcpu_cpi;
6374 
6375         switch (cpi->cpi_vendor) {
6376         case X86_VENDOR_Intel:
6377                 /*
6378                  * Always-running Local APIC Timer is
6379                  * indicated by CPUID.6.EAX[2].
6380                  */
6381                 if (cpi->cpi_maxeax >= 6) {
6382                         regs.cp_eax = 6;
6383                         (void) cpuid_insn(NULL, &regs);
6384                         return (regs.cp_eax & CPUID_CSTATE_ARAT);
6385                 } else {
6386                         return (0);
6387                 }
6388         default:
6389                 return (0);
6390         }
6391 }
6392 
6393 /*
6394  * Check support for Intel ENERGY_PERF_BIAS feature
6395  */
6396 int
6397 cpuid_iepb_supported(struct cpu *cp)
6398 {
6399         struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6400         struct cpuid_regs regs;
6401 
6402         ASSERT(cpuid_checkpass(cp, 1));
6403 
6404         if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6405             !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6406                 return (0);
6407         }
6408 
6409         /*
6410          * Intel ENERGY_PERF_BIAS MSR is indicated by
6411          * capability bit CPUID.6.ECX.3
6412          */
6413         if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6414                 return (0);
6415 
6416         regs.cp_eax = 0x6;
6417         (void) cpuid_insn(NULL, &regs);
6418         return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6419 }
6420 
6421 /*
6422  * Check support for TSC deadline timer
6423  *
6424  * TSC deadline timer provides a superior software programming
6425  * model over local APIC timer that eliminates "time drifts".
6426  * Instead of specifying a relative time, software specifies an
6427  * absolute time as the target at which the processor should
6428  * generate a timer event.
6429  */
6430 int
6431 cpuid_deadline_tsc_supported(void)
6432 {
6433         struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6434         struct cpuid_regs regs;
6435 
6436         ASSERT(cpuid_checkpass(CPU, 1));
6437         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6438 
6439         switch (cpi->cpi_vendor) {
6440         case X86_VENDOR_Intel:
6441                 if (cpi->cpi_maxeax >= 1) {
6442                         regs.cp_eax = 1;
6443                         (void) cpuid_insn(NULL, &regs);
6444                         return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6445                 } else {
6446                         return (0);
6447                 }
6448         default:
6449                 return (0);
6450         }
6451 }
6452 
6453 #if defined(__amd64) && !defined(__xpv)
6454 /*
6455  * Patch in versions of bcopy for high performance Intel Nhm processors
6456  * and later...
6457  */
6458 void
6459 patch_memops(uint_t vendor)
6460 {
6461         size_t cnt, i;
6462         caddr_t to, from;
6463 
6464         if ((vendor == X86_VENDOR_Intel) &&
6465             is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6466                 cnt = &bcopy_patch_end - &bcopy_patch_start;
6467                 to = &bcopy_ck_size;
6468                 from = &bcopy_patch_start;
6469                 for (i = 0; i < cnt; i++) {
6470                         *to++ = *from++;
6471                 }
6472         }
6473 }
6474 #endif  /* __amd64 && !__xpv */
6475 
6476 /*
6477  * We're being asked to tell the system how many bits are required to represent
6478  * the various thread and strand IDs. While it's tempting to derive this based
6479  * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6480  * correct. Instead, this needs to be based on the number of bits that the APIC
6481  * allows for these different configurations. We only update these to a larger
6482  * value if we find one.
6483  */
6484 void
6485 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6486 {
6487         struct cpuid_info *cpi;
6488 
6489         VERIFY(cpuid_checkpass(CPU, 1));
6490         cpi = cpu->cpu_m.mcpu_cpi;
6491 
6492         if (cpi->cpi_ncore_bits > *core_nbits) {
6493                 *core_nbits = cpi->cpi_ncore_bits;
6494         }
6495 
6496         if (cpi->cpi_nthread_bits > *strand_nbits) {
6497                 *strand_nbits = cpi->cpi_nthread_bits;
6498         }
6499 }
6500 
6501 void
6502 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6503 {
6504         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6505         struct cpuid_regs cp;
6506 
6507         /*
6508          * Reread the CPUID portions that we need for various security
6509          * information.
6510          */
6511         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6512                 /*
6513                  * Check if we now have leaf 7 available to us.
6514                  */
6515                 if (cpi->cpi_maxeax < 7) {
6516                         bzero(&cp, sizeof (cp));
6517                         cp.cp_eax = 0;
6518                         cpi->cpi_maxeax = __cpuid_insn(&cp);
6519                         if (cpi->cpi_maxeax < 7)
6520                                 return;
6521                 }
6522 
6523                 bzero(&cp, sizeof (cp));
6524                 cp.cp_eax = 7;
6525                 cp.cp_ecx = 0;
6526                 (void) __cpuid_insn(&cp);
6527                 cpi->cpi_std[7] = cp;
6528         } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6529                 /* No xcpuid support */
6530                 if (cpi->cpi_family < 5 ||
6531                     (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6532                         return;
6533 
6534                 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6535                         bzero(&cp, sizeof (cp));
6536                         cp.cp_eax = CPUID_LEAF_EXT_0;
6537                         cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6538                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6539                                 return;
6540                         }
6541                 }
6542 
6543                 bzero(&cp, sizeof (cp));
6544                 cp.cp_eax = CPUID_LEAF_EXT_8;
6545                 (void) __cpuid_insn(&cp);
6546                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6547                 cpi->cpi_extd[8] = cp;
6548         } else {
6549                 /*
6550                  * Nothing to do here. Return an empty set which has already
6551                  * been zeroed for us.
6552                  */
6553                 return;
6554         }
6555         cpuid_scan_security(cpu, fset);
6556 }
6557 
6558 /* ARGSUSED */
6559 static int
6560 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6561 {
6562         uchar_t *fset;
6563 
6564         fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6565         cpuid_pass_ucode(CPU, fset);
6566 
6567         return (0);
6568 }
6569 
6570 /*
6571  * After a microcode update where the version has changed, then we need to
6572  * rescan CPUID. To do this we check every CPU to make sure that they have the
6573  * same microcode. Then we perform a cross call to all such CPUs. It's the
6574  * caller's job to make sure that no one else can end up doing an update while
6575  * this is going on.
6576  *
6577  * We assume that the system is microcode capable if we're called.
6578  */
6579 void
6580 cpuid_post_ucodeadm(void)
6581 {
6582         uint32_t rev;
6583         int i;
6584         struct cpu *cpu;
6585         cpuset_t cpuset;
6586         void *argdata;
6587         uchar_t *f0;
6588 
6589         argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6590 
6591         mutex_enter(&cpu_lock);
6592         cpu = cpu_get(0);
6593         rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6594         CPUSET_ONLY(cpuset, 0);
6595         for (i = 1; i < max_ncpus; i++) {
6596                 if ((cpu = cpu_get(i)) == NULL)
6597                         continue;
6598 
6599                 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6600                         panic("post microcode update CPU %d has differing "
6601                             "microcode revision (%u) from CPU 0 (%u)",
6602                             i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6603                 }
6604                 CPUSET_ADD(cpuset, i);
6605         }
6606 
6607         kpreempt_disable();
6608         xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6609             cpuid_post_ucodeadm_xc);
6610         kpreempt_enable();
6611 
6612         /*
6613          * OK, now look at each CPU and see if their feature sets are equal.
6614          */
6615         f0 = argdata;
6616         for (i = 1; i < max_ncpus; i++) {
6617                 uchar_t *fset;
6618                 if (!CPU_IN_SET(cpuset, i))
6619                         continue;
6620 
6621                 fset = (uchar_t *)((uintptr_t)argdata +
6622                     sizeof (x86_featureset) * i);
6623 
6624                 if (!compare_x86_featureset(f0, fset)) {
6625                         panic("Post microcode update CPU %d has "
6626                             "differing security feature (%p) set from CPU 0 "
6627                             "(%p), not appending to feature set", i,
6628                             (void *)fset, (void *)f0);
6629                 }
6630         }
6631 
6632         mutex_exit(&cpu_lock);
6633 
6634         for (i = 0; i < NUM_X86_FEATURES; i++) {
6635                 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6636                     x86_feature_names[i]);
6637                 if (is_x86_feature(f0, i)) {
6638                         add_x86_feature(x86_featureset, i);
6639                 }
6640         }
6641         kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6642 }