1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
  26  */
  27 /*
  28  * Copyright (c) 2010, Intel Corporation.
  29  * All rights reserved.
  30  */
  31 /*
  32  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  33  */
  34 /*
  35  * Copyright 2019, Joyent, Inc.
  36  */
  37 
  38 /*
  39  * CPU Identification logic
  40  *
  41  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  42  * with the identification of CPUs, their features, and their topologies. More
  43  * specifically, this file helps drive the following:
  44  *
  45  * 1. Enumeration of features of the processor which are used by the kernel to
  46  *    determine what features to enable or disable. These may be instruction set
  47  *    enhancements or features that we use.
  48  *
  49  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  50  *    will be told about through the auxiliary vector.
  51  *
  52  * 3. Understanding the physical topology of the CPU such as the number of
  53  *    caches, how many cores it has, whether or not it supports symmetric
  54  *    multi-processing (SMT), etc.
  55  *
  56  * ------------------------
  57  * CPUID History and Basics
  58  * ------------------------
  59  *
  60  * The cpuid instruction was added by Intel roughly around the time that the
  61  * original Pentium was introduced. The purpose of cpuid was to tell in a
  62  * programmatic fashion information about the CPU that previously was guessed
  63  * at. For example, an important part of cpuid is that we can know what
  64  * extensions to the ISA exist. If you use an invalid opcode you would get a
  65  * #UD, so this method allows a program (whether a user program or the kernel)
  66  * to determine what exists without crashing or getting a SIGILL. Of course,
  67  * this was also during the era of the clones and the AMD Am5x86. The vendor
  68  * name shows up first in cpuid for a reason.
  69  *
  70  * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
  71  * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
  72  * its own meaning. The different leaves are broken down into different regions:
  73  *
  74  *      [ 0, 7fffffff ]                 This region is called the 'basic'
  75  *                                      region. This region is generally defined
  76  *                                      by Intel, though some of the original
  77  *                                      portions have different meanings based
  78  *                                      on the manufacturer. These days, Intel
  79  *                                      adds most new features to this region.
  80  *                                      AMD adds non-Intel compatible
  81  *                                      information in the third, extended
  82  *                                      region. Intel uses this for everything
  83  *                                      including ISA extensions, CPU
  84  *                                      features, cache information, topology,
  85  *                                      and more.
  86  *
  87  *                                      There is a hole carved out of this
  88  *                                      region which is reserved for
  89  *                                      hypervisors.
  90  *
  91  *      [ 40000000, 4fffffff ]          This region, which is found in the
  92  *                                      middle of the previous region, is
  93  *                                      explicitly promised to never be used by
  94  *                                      CPUs. Instead, it is used by hypervisors
  95  *                                      to communicate information about
  96  *                                      themselves to the operating system. The
  97  *                                      values and details are unique for each
  98  *                                      hypervisor.
  99  *
 100  *      [ 80000000, ffffffff ]          This region is called the 'extended'
 101  *                                      region. Some of the low leaves mirror
 102  *                                      parts of the basic leaves. This region
 103  *                                      has generally been used by AMD for
 104  *                                      various extensions. For example, AMD-
 105  *                                      specific information about caches,
 106  *                                      features, and topology are found in this
 107  *                                      region.
 108  *
 109  * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
 110  * and %edx, and then issue the cpuid instruction. At the first leaf in each of
 111  * the ranges, one of the primary things returned is the maximum valid leaf in
 112  * that range. This allows for discovery of what range of CPUID is valid.
 113  *
 114  * The CPUs have potentially surprising behavior when using an invalid leaf or
 115  * unimplemented leaf. If the requested leaf is within the valid basic or
 116  * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
 117  * set to zero. However, if you specify a leaf that is outside of a valid range,
 118  * then instead it will be filled with the last valid _basic_ leaf. For example,
 119  * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
 120  * an invalid extended leaf will return the information for leaf 3.
 121  *
 122  * Some leaves are broken down into sub-leaves. This means that the value
 123  * depends on both the leaf asked for in %eax and a secondary register. For
 124  * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
 125  * additional information. Or when getting topology information in leaf 0xb, the
 126  * initial value in %ecx changes which level of the topology that you are
 127  * getting information about.
 128  *
 129  * cpuid values are always kept to 32 bits regardless of whether or not the
 130  * program is in 64-bit mode. When executing in 64-bit mode, the upper
 131  * 32 bits of the register are always set to zero so that way the values are the
 132  * same regardless of execution mode.
 133  *
 134  * ----------------------
 135  * Identifying Processors
 136  * ----------------------
 137  *
 138  * We can identify a processor in two steps. The first step looks at cpuid leaf
 139  * 0. Leaf 0 contains the processor's vendor information. This is done by
 140  * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
 141  * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
 142  *
 143  * From there, a processor is identified by a combination of three different
 144  * values:
 145  *
 146  *  1. Family
 147  *  2. Model
 148  *  3. Stepping
 149  *
 150  * Each vendor uses the family and model to uniquely identify a processor. The
 151  * way that family and model are changed depends on the vendor. For example,
 152  * Intel has been using family 0x6 for almost all of their processor since the
 153  * Pentium Pro/Pentium II era, often called the P6. The model is used to
 154  * identify the exact processor. Different models are often used for the client
 155  * (consumer) and server parts. Even though each processor often has major
 156  * architectural differences, they still are considered the same family by
 157  * Intel.
 158  *
 159  * On the other hand, each major AMD architecture generally has its own family.
 160  * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
 161  * the model number is used to help identify specific processors.
 162  *
 163  * The stepping is used to refer to a revision of a specific microprocessor. The
 164  * term comes from equipment used to produce masks that are used to create
 165  * integrated circuits.
 166  *
 167  * The information is present in leaf 1, %eax. In technical documentation you
 168  * will see the terms extended model and extended family. The original family,
 169  * model, and stepping fields were each 4 bits wide. If the values in either
 170  * are 0xf, then one is to consult the extended model and extended family, which
 171  * take previously reserved bits and allow for a larger number of models and add
 172  * 0xf to them.
 173  *
 174  * When we process this information, we store the full family, model, and
 175  * stepping in the struct cpuid_info members cpi_family, cpi_model, and
 176  * cpi_step, respectively. Whenever you are performing comparisons with the
 177  * family, model, and stepping, you should use these members and not the raw
 178  * values from cpuid. If you must use the raw values from cpuid directly, you
 179  * must make sure that you add the extended model and family to the base model
 180  * and family.
 181  *
 182  * In general, we do not use information about the family, model, and stepping
 183  * to determine whether or not a feature is present; that is generally driven by
 184  * specific leaves. However, when something we care about on the processor is
 185  * not considered 'architectural' meaning that it is specific to a set of
 186  * processors and not promised in the architecture model to be consistent from
 187  * generation to generation, then we will fall back on this information. The
 188  * most common cases where this comes up is when we have to workaround errata in
 189  * the processor, are dealing with processor-specific features such as CPU
 190  * performance counters, or we want to provide additional information for things
 191  * such as fault management.
 192  *
 193  * While processors also do have a brand string, which is the name that people
 194  * are familiar with when buying the processor, they are not meant for
 195  * programmatic consumption. That is what the family, model, and stepping are
 196  * for.
 197  *
 198  * ------------
 199  * CPUID Passes
 200  * ------------
 201  *
 202  * As part of performing feature detection, we break this into several different
 203  * passes. The passes are as follows:
 204  *
 205  *      Pass 0          This is a primordial pass done in locore.s to deal with
 206  *                      Cyrix CPUs that don't support cpuid. The reality is that
 207  *                      we likely don't run on them any more, but there is still
 208  *                      logic for handling them.
 209  *
 210  *      Pass 1          This is the primary pass and is responsible for doing a
 211  *                      large number of different things:
 212  *
 213  *                      1. Determine which vendor manufactured the CPU and
 214  *                      determining the family, model, and stepping information.
 215  *
 216  *                      2. Gathering a large number of feature flags to
 217  *                      determine which features the CPU support and which
 218  *                      indicate things that we need to do other work in the OS
 219  *                      to enable. Features detected this way are added to the
 220  *                      x86_featureset which can be queried to
 221  *                      determine what we should do. This includes processing
 222  *                      all of the basic and extended CPU features that we care
 223  *                      about.
 224  *
 225  *                      3. Determining the CPU's topology. This includes
 226  *                      information about how many cores and threads are present
 227  *                      in the package. It also is responsible for figuring out
 228  *                      which logical CPUs are potentially part of the same core
 229  *                      and what other resources they might share. For more
 230  *                      information see the 'Topology' section.
 231  *
 232  *                      4. Determining the set of CPU security-specific features
 233  *                      that we need to worry about and determine the
 234  *                      appropriate set of workarounds.
 235  *
 236  *                      Pass 1 on the boot CPU occurs before KMDB is started.
 237  *
 238  *      Pass 2          The second pass is done after startup(). Here, we check
 239  *                      other miscellaneous features. Most of this is gathering
 240  *                      additional basic and extended features that we'll use in
 241  *                      later passes or for debugging support.
 242  *
 243  *      Pass 3          The third pass occurs after the kernel memory allocator
 244  *                      has been fully initialized. This gathers information
 245  *                      where we might need dynamic memory available for our
 246  *                      uses. This includes several varying width leaves that
 247  *                      have cache information and the processor's brand string.
 248  *
 249  *      Pass 4          The fourth and final normal pass is performed after the
 250  *                      kernel has brought most everything online. This is
 251  *                      invoked from post_startup(). In this pass, we go through
 252  *                      the set of features that we have enabled and turn that
 253  *                      into the hardware auxiliary vector features that
 254  *                      userland receives. This is used by userland, primarily
 255  *                      by the run-time link-editor (RTLD), though userland
 256  *                      software could also refer to it directly.
 257  *
 258  *      Microcode       After a microcode update, we do a selective rescan of
 259  *                      the cpuid leaves to determine what features have
 260  *                      changed. Microcode updates can provide more details
 261  *                      about security related features to deal with issues like
 262  *                      Spectre and L1TF. On occasion, vendors have violated
 263  *                      their contract and removed bits. However, we don't try
 264  *                      to detect that because that puts us in a situation that
 265  *                      we really can't deal with. As such, the only thing we
 266  *                      rescan are security related features today. See
 267  *                      cpuid_pass_ucode().
 268  *
 269  * All of the passes (except pass 0) are run on all CPUs. However, for the most
 270  * part we only care about what the boot CPU says about this information and use
 271  * the other CPUs as a rough guide to sanity check that we have the same feature
 272  * set.
 273  *
 274  * We do not support running multiple logical CPUs with disjoint, let alone
 275  * different, feature sets.
 276  *
 277  * ------------------
 278  * Processor Topology
 279  * ------------------
 280  *
 281  * One of the important things that we need to do is to understand the topology
 282  * of the underlying processor. When we say topology in this case, we're trying
 283  * to understand the relationship between the logical CPUs that the operating
 284  * system sees and the underlying physical layout. Different logical CPUs may
 285  * share different resources which can have important consequences for the
 286  * performance of the system. For example, they may share caches, execution
 287  * units, and more.
 288  *
 289  * The topology of the processor changes from generation to generation and
 290  * vendor to vendor.  Along with that, different vendors use different
 291  * terminology, and the operating system itself uses occasionally overlapping
 292  * terminology. It's important to understand what this topology looks like so
 293  * one can understand the different things that we try to calculate and
 294  * determine.
 295  *
 296  * To get started, let's talk about a little bit of terminology that we've used
 297  * so far, is used throughout this file, and is fairly generic across multiple
 298  * vendors:
 299  *
 300  * CPU
 301  *      A central processing unit (CPU) refers to a logical and/or virtual
 302  *      entity that the operating system can execute instructions on. The
 303  *      underlying resources for this CPU may be shared between multiple
 304  *      entities; however, to the operating system it is a discrete unit.
 305  *
 306  * PROCESSOR and PACKAGE
 307  *
 308  *      Generally, when we use the term 'processor' on its own, we are referring
 309  *      to the physical entity that one buys and plugs into a board. However,
 310  *      because processor has been overloaded and one might see it used to mean
 311  *      multiple different levels, we will instead use the term 'package' for
 312  *      the rest of this file. The term package comes from the electrical
 313  *      engineering side and refers to the physical entity that encloses the
 314  *      electronics inside. Strictly speaking the package can contain more than
 315  *      just the CPU, for example, on many processors it may also have what's
 316  *      called an 'integrated graphical processing unit (GPU)'. Because the
 317  *      package can encapsulate multiple units, it is the largest physical unit
 318  *      that we refer to.
 319  *
 320  * SOCKET
 321  *
 322  *      A socket refers to unit on a system board (generally the motherboard)
 323  *      that can receive a package. A single package, or processor, is plugged
 324  *      into a single socket. A system may have multiple sockets. Often times,
 325  *      the term socket is used interchangeably with package and refers to the
 326  *      electrical component that has plugged in, and not the receptacle itself.
 327  *
 328  * CORE
 329  *
 330  *      A core refers to the physical instantiation of a CPU, generally, with a
 331  *      full set of hardware resources available to it. A package may contain
 332  *      multiple cores inside of it or it may just have a single one. A
 333  *      processor with more than one core is often referred to as 'multi-core'.
 334  *      In illumos, we will use the feature X86FSET_CMP to refer to a system
 335  *      that has 'multi-core' processors.
 336  *
 337  *      A core may expose a single logical CPU to the operating system, or it
 338  *      may expose multiple CPUs, which we call threads, defined below.
 339  *
 340  *      Some resources may still be shared by cores in the same package. For
 341  *      example, many processors will share the level 3 cache between cores.
 342  *      Some AMD generations share hardware resources between cores. For more
 343  *      information on that see the section 'AMD Topology'.
 344  *
 345  * THREAD and STRAND
 346  *
 347  *      In this file, generally a thread refers to a hardware resources and not
 348  *      the operating system's logical abstraction. A thread is always exposed
 349  *      as an independent logical CPU to the operating system. A thread belongs
 350  *      to a specific core. A core may have more than one thread. When that is
 351  *      the case, the threads that are part of the same core are often referred
 352  *      to as 'siblings'.
 353  *
 354  *      When multiple threads exist, this is generally referred to as
 355  *      simultaneous multi-threading (SMT). When Intel introduced this in their
 356  *      processors they called it hyper-threading (HT). When multiple threads
 357  *      are active in a core, they split the resources of the core. For example,
 358  *      two threads may share the same set of hardware execution units.
 359  *
 360  *      The operating system often uses the term 'strand' to refer to a thread.
 361  *      This helps disambiguate it from the software concept.
 362  *
 363  * CHIP
 364  *
 365  *      Unfortunately, the term 'chip' is dramatically overloaded. At its most
 366  *      base meaning, it is used to refer to a single integrated circuit, which
 367  *      may or may not be the only thing in the package. In illumos, when you
 368  *      see the term 'chip' it is almost always referring to the same thing as
 369  *      the 'package'. However, many vendors may use chip to refer to one of
 370  *      many integrated circuits that have been placed in the package. As an
 371  *      example, see the subsequent definition.
 372  *
 373  *      To try and keep things consistent, we will only use chip when referring
 374  *      to the entire integrated circuit package, with the exception of the
 375  *      definition of multi-chip module (because it is in the name) and use the
 376  *      term 'die' when we want the more general, potential sub-component
 377  *      definition.
 378  *
 379  * DIE
 380  *
 381  *      A die refers to an integrated circuit. Inside of the package there may
 382  *      be a single die or multiple dies. This is sometimes called a 'chip' in
 383  *      vendor's parlance, but in this file, we use the term die to refer to a
 384  *      subcomponent.
 385  *
 386  * MULTI-CHIP MODULE
 387  *
 388  *      A multi-chip module (MCM) refers to putting multiple distinct chips that
 389  *      are connected together in the same package. When a multi-chip design is
 390  *      used, generally each chip is manufactured independently and then joined
 391  *      together in the package. For example, on AMD's Zen microarchitecture
 392  *      (family 0x17), the package contains several dies (the second meaning of
 393  *      chip from above) that are connected together.
 394  *
 395  * CACHE
 396  *
 397  *      A cache is a part of the processor that maintains copies of recently
 398  *      accessed memory. Caches are split into levels and then into types.
 399  *      Commonly there are one to three levels, called level one, two, and
 400  *      three. The lower the level, the smaller it is, the closer it is to the
 401  *      execution units of the CPU, and the faster it is to access. The layout
 402  *      and design of the cache come in many different flavors, consult other
 403  *      resources for a discussion of those.
 404  *
 405  *      Caches are generally split into two types, the instruction and data
 406  *      cache. The caches contain what their names suggest, the instruction
 407  *      cache has executable program text, while the data cache has all other
 408  *      memory that the processor accesses. As of this writing, data is kept
 409  *      coherent between all of the caches on x86, so if one modifies program
 410  *      text before it is executed, that will be in the data cache, and the
 411  *      instruction cache will be synchronized with that change when the
 412  *      processor actually executes those instructions. This coherency also
 413  *      covers the fact that data could show up in multiple caches.
 414  *
 415  *      Generally, the lowest level caches are specific to a core. However, the
 416  *      last layer cache is shared between some number of cores. The number of
 417  *      CPUs sharing this last level cache is important. This has implications
 418  *      for the choices that the scheduler makes, as accessing memory that might
 419  *      be in a remote cache after thread migration can be quite expensive.
 420  *
 421  *      Sometimes, the word cache is abbreviated with a '$', because in US
 422  *      English the word cache is pronounced the same as cash. So L1D$ refers to
 423  *      the L1 data cache, and L2$ would be the L2 cache. This will not be used
 424  *      in the rest of this theory statement for clarity.
 425  *
 426  * MEMORY CONTROLLER
 427  *
 428  *      The memory controller is a component that provides access to DRAM. Each
 429  *      memory controller can access a set number of DRAM channels. Each channel
 430  *      can have a number of DIMMs (sticks of memory) associated with it. A
 431  *      given package may have more than one memory controller. The association
 432  *      of the memory controller to a group of cores is important as it is
 433  *      cheaper to access memory on the controller that you are associated with.
 434  *
 435  * NUMA
 436  *
 437  *      NUMA or non-uniform memory access, describes a way that systems are
 438  *      built. On x86, any processor core can address all of the memory in the
 439  *      system. However, When using multiple sockets or possibly within a
 440  *      multi-chip module, some of that memory is physically closer and some of
 441  *      it is further. Memory that is further away is more expensive to access.
 442  *      Consider the following image of multiple sockets with memory:
 443  *
 444  *      +--------+                                                +--------+
 445  *      | DIMM A |         +----------+      +----------+         | DIMM D |
 446  *      +--------+-+       |          |      |          |       +-+------+-+
 447  *        | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
 448  *        +--------+-+     |          |      |          |     +-+------+-+
 449  *          | DIMM C |     +----------+      +----------+     | DIMM F |
 450  *          +--------+                                        +--------+
 451  *
 452  *      In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
 453  *      closer to DIMMs D-F. This means that it is cheaper for socket 0 to
 454  *      access DIMMs A-C and more expensive to access D-F as it has to go
 455  *      through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
 456  *      D-F are cheaper than A-C. While the socket form is the most common, when
 457  *      using multi-chip modules, this can also sometimes occur. For another
 458  *      example of this that's more involved, see the AMD topology section.
 459  *
 460  *
 461  * Intel Topology
 462  * --------------
 463  *
 464  * Most Intel processors since Nehalem, (as of this writing the current gen
 465  * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
 466  * the package is a single monolithic die. MCMs currently aren't used. Most
 467  * parts have three levels of caches, with the L3 cache being shared between
 468  * all of the cores on the package. The L1/L2 cache is generally specific to
 469  * an individual core. The following image shows at a simplified level what
 470  * this looks like. The memory controller is commonly part of something called
 471  * the 'Uncore', that used to be separate physical chips that were not a part of
 472  * the package, but are now part of the same chip.
 473  *
 474  *  +-----------------------------------------------------------------------+
 475  *  | Package                                                               |
 476  *  |  +-------------------+  +-------------------+  +-------------------+  |
 477  *  |  | Core              |  | Core              |  | Core              |  |
 478  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 479  *  |  |  | Thread | | L | |  |  | Thread | | L | |  |  | Thread | | L | |  |
 480  *  |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |
 481  *  |  |  +--------+ |   | |  |  +--------+ |   | |  |  +--------+ |   | |  |
 482  *  |  |  | Thread | |   | |  |  | Thread | |   | |  |  | Thread | |   | |  |
 483  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 484  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 485  *  |  |  | L2 Cache     | |  |  | L2 Cache     | |  |  | L2 Cache     | |  |
 486  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 487  *  |  +-------------------+  +-------------------+  +-------------------+  |
 488  *  | +-------------------------------------------------------------------+ |
 489  *  | |                         Shared L3 Cache                           | |
 490  *  | +-------------------------------------------------------------------+ |
 491  *  | +-------------------------------------------------------------------+ |
 492  *  | |                        Memory Controller                          | |
 493  *  | +-------------------------------------------------------------------+ |
 494  *  +-----------------------------------------------------------------------+
 495  *
 496  * A side effect of this current architecture is that what we care about from a
 497  * scheduling and topology perspective, is simplified. In general we care about
 498  * understanding which logical CPUs are part of the same core and socket.
 499  *
 500  * To determine the relationship between threads and cores, Intel initially used
 501  * the identifier in the advanced programmable interrupt controller (APIC). They
 502  * also added cpuid leaf 4 to give additional information about the number of
 503  * threads and CPUs in the processor. With the addition of x2apic (which
 504  * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
 505  * additional cpuid topology leaf 0xB was added.
 506  *
 507  * AMD Topology
 508  * ------------
 509  *
 510  * When discussing AMD topology, we want to break this into three distinct
 511  * generations of topology. There's the basic topology that has been used in
 512  * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
 513  * with family 0x15 (Bulldozer), and there's the topology that was introduced
 514  * with family 0x17 (Zen). AMD also has some additional terminology that's worth
 515  * talking about.
 516  *
 517  * Until the introduction of family 0x17 (Zen), AMD did not implement something
 518  * that they considered SMT. Whether or not the AMD processors have SMT
 519  * influences many things including scheduling and reliability, availability,
 520  * and serviceability (RAS) features.
 521  *
 522  * NODE
 523  *
 524  *      AMD uses the term node to refer to a die that contains a number of cores
 525  *      and I/O resources. Depending on the processor family and model, more
 526  *      than one node can be present in the package. When there is more than one
 527  *      node this indicates a multi-chip module. Usually each node has its own
 528  *      access to memory and I/O devices. This is important and generally
 529  *      different from the corresponding Intel Nehalem-Skylake+ processors. As a
 530  *      result, we track this relationship in the operating system.
 531  *
 532  *      In processors with an L3 cache, the L3 cache is generally shared across
 533  *      the entire node, though the way this is carved up varies from generation
 534  *      to generation.
 535  *
 536  * BULLDOZER
 537  *
 538  *      Starting with the Bulldozer family (0x15) and continuing until the
 539  *      introduction of the Zen microarchitecture, AMD introduced the idea of a
 540  *      compute unit. In a compute unit, two traditional cores share a number of
 541  *      hardware resources. Critically, they share the FPU, L1 instruction
 542  *      cache, and the L2 cache. Several compute units were then combined inside
 543  *      of a single node.  Because the integer execution units, L1 data cache,
 544  *      and some other resources were not shared between the cores, AMD never
 545  *      considered this to be SMT.
 546  *
 547  * ZEN
 548  *
 549  *      The Zen family (0x17) uses a multi-chip module (MCM) design, the module
 550  *      is called Zeppelin. These modules are similar to the idea of nodes used
 551  *      previously. Each of these nodes has two DRAM channels which all of the
 552  *      cores in the node can access uniformly. These nodes are linked together
 553  *      in the package, creating a NUMA environment.
 554  *
 555  *      The Zeppelin die itself contains two different 'core complexes'. Each
 556  *      core complex consists of four cores which each have two threads, for a
 557  *      total of 8 logical CPUs per complex. Unlike other generations,
 558  *      where all the logical CPUs in a given node share the L3 cache, here each
 559  *      core complex has its own shared L3 cache.
 560  *
 561  *      A further thing that we need to consider is that in some configurations,
 562  *      particularly with the Threadripper line of processors, not every die
 563  *      actually has its memory controllers wired up to actual memory channels.
 564  *      This means that some cores have memory attached to them and others
 565  *      don't.
 566  *
 567  *      To put Zen in perspective, consider the following images:
 568  *
 569  *      +--------------------------------------------------------+
 570  *      | Core Complex                                           |
 571  *      | +-------------------+    +-------------------+  +---+  |
 572  *      | | Core       +----+ |    | Core       +----+ |  |   |  |
 573  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  |   |  |
 574  *      | | | Thread | +----+ |    | | Thread | +----+ |  |   |  |
 575  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  | L |  |
 576  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  | 3 |  |
 577  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 578  *      | +-------------------+    +-------------------+  | C |  |
 579  *      | +-------------------+    +-------------------+  | a |  |
 580  *      | | Core       +----+ |    | Core       +----+ |  | c |  |
 581  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  | h |  |
 582  *      | | | Thread | +----+ |    | | Thread | +----+ |  | e |  |
 583  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  |   |  |
 584  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  |   |  |
 585  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 586  *      | +-------------------+    +-------------------+  +---+  |
 587  *      |                                                        |
 588  *      +--------------------------------------------------------+
 589  *
 590  *  This first image represents a single Zen core complex that consists of four
 591  *  cores.
 592  *
 593  *
 594  *      +--------------------------------------------------------+
 595  *      | Zeppelin Die                                           |
 596  *      |  +--------------------------------------------------+  |
 597  *      |  |         I/O Units (PCIe, SATA, USB, etc.)        |  |
 598  *      |  +--------------------------------------------------+  |
 599  *      |                           HH                           |
 600  *      |          +-----------+    HH    +-----------+          |
 601  *      |          |           |    HH    |           |          |
 602  *      |          |    Core   |==========|    Core   |          |
 603  *      |          |  Complex  |==========|  Complex  |          |
 604  *      |          |           |    HH    |           |          |
 605  *      |          +-----------+    HH    +-----------+          |
 606  *      |                           HH                           |
 607  *      |  +--------------------------------------------------+  |
 608  *      |  |                Memory Controller                 |  |
 609  *      |  +--------------------------------------------------+  |
 610  *      |                                                        |
 611  *      +--------------------------------------------------------+
 612  *
 613  *  This image represents a single Zeppelin Die. Note how both cores are
 614  *  connected to the same memory controller and I/O units. While each core
 615  *  complex has its own L3 cache as seen in the first image, they both have
 616  *  uniform access to memory.
 617  *
 618  *
 619  *                      PP                     PP
 620  *                      PP                     PP
 621  *           +----------PP---------------------PP---------+
 622  *           |          PP                     PP         |
 623  *           |    +-----------+          +-----------+    |
 624  *           |    |           |          |           |    |
 625  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 626  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 627  *           |    |           |          |           |    |
 628  *           |    +-----------+ooo    ...+-----------+    |
 629  *           |          HH      ooo  ...       HH         |
 630  *           |          HH        oo..         HH         |
 631  *           |          HH        ..oo         HH         |
 632  *           |          HH      ...  ooo       HH         |
 633  *           |    +-----------+...    ooo+-----------+    |
 634  *           |    |           |          |           |    |
 635  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 636  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 637  *           |    |           |          |           |    |
 638  *           |    +-----------+          +-----------+    |
 639  *           |          PP                     PP         |
 640  *           +----------PP---------------------PP---------+
 641  *                      PP                     PP
 642  *                      PP                     PP
 643  *
 644  *  This image represents a single Zen package. In this example, it has four
 645  *  Zeppelin dies, though some configurations only have a single one. In this
 646  *  example, each die is directly connected to the next. Also, each die is
 647  *  represented as being connected to memory by the 'M' character and connected
 648  *  to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
 649  *  die is made up of two core complexes, we have multiple different NUMA
 650  *  domains that we care about for these systems.
 651  *
 652  * CPUID LEAVES
 653  *
 654  * There are a few different CPUID leaves that we can use to try and understand
 655  * the actual state of the world. As part of the introduction of family 0xf, AMD
 656  * added CPUID leaf 0x80000008. This leaf tells us the number of logical
 657  * processors that are in the system. Because families before Zen didn't have
 658  * SMT, this was always the number of cores that were in the system. However, it
 659  * should always be thought of as the number of logical threads to be consistent
 660  * between generations. In addition we also get the size of the APIC ID that is
 661  * used to represent the number of logical processors. This is important for
 662  * deriving topology information.
 663  *
 664  * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
 665  * bit between Bulldozer and later families, but it is quite useful in
 666  * determining the topology information. Because this information has changed
 667  * across family generations, it's worth calling out what these mean
 668  * explicitly. The registers have the following meanings:
 669  *
 670  *      %eax    The APIC ID. The entire register is defined to have a 32-bit
 671  *              APIC ID, even though on systems without x2apic support, it will
 672  *              be limited to 8 bits.
 673  *
 674  *      %ebx    On Bulldozer-era systems this contains information about the
 675  *              number of cores that are in a compute unit (cores that share
 676  *              resources). It also contains a per-package compute unit ID that
 677  *              identifies which compute unit the logical CPU is a part of.
 678  *
 679  *              On Zen-era systems this instead contains the number of threads
 680  *              per core and the ID of the core that the logical CPU is a part
 681  *              of. Note, this ID is unique only to the package, it is not
 682  *              globally unique across the entire system.
 683  *
 684  *      %ecx    This contains the number of nodes that exist in the package. It
 685  *              also contains an ID that identifies which node the logical CPU
 686  *              is a part of.
 687  *
 688  * Finally, we also use cpuid leaf 0x8000001D to determine information about the
 689  * cache layout to determine which logical CPUs are sharing which caches.
 690  *
 691  * illumos Topology
 692  * ----------------
 693  *
 694  * Based on the above we synthesize the information into several different
 695  * variables that we store in the 'struct cpuid_info'. We'll go into the details
 696  * of what each member is supposed to represent and their uniqueness. In
 697  * general, there are two levels of uniqueness that we care about. We care about
 698  * an ID that is globally unique. That means that it will be unique across all
 699  * entities in the system. For example, the default logical CPU ID is globally
 700  * unique. On the other hand, there is some information that we only care about
 701  * being unique within the context of a single package / socket. Here are the
 702  * variables that we keep track of and their meaning.
 703  *
 704  * Several of the values that are asking for an identifier, with the exception
 705  * of cpi_apicid, are allowed to be synthetic.
 706  *
 707  *
 708  * cpi_apicid
 709  *
 710  *      This is the value of the CPU's APIC id. This should be the full 32-bit
 711  *      ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
 712  *      APIC ID. This value is globally unique between all logical CPUs across
 713  *      all packages. This is usually required by the APIC.
 714  *
 715  * cpi_chipid
 716  *
 717  *      This value indicates the ID of the package that the logical CPU is a
 718  *      part of. This value is allowed to be synthetic. It is usually derived by
 719  *      taking the CPU's APIC ID and determining how many bits are used to
 720  *      represent CPU cores in the package. All logical CPUs that are part of
 721  *      the same package must have the same value.
 722  *
 723  * cpi_coreid
 724  *
 725  *      This represents the ID of a CPU core. Two logical CPUs should only have
 726  *      the same cpi_coreid value if they are part of the same core. These
 727  *      values may be synthetic. On systems that support SMT, this value is
 728  *      usually derived from the APIC ID, otherwise it is often synthetic and
 729  *      just set to the value of the cpu_id in the cpu_t.
 730  *
 731  * cpi_pkgcoreid
 732  *
 733  *      This is similar to the cpi_coreid in that logical CPUs that are part of
 734  *      the same core should have the same ID. The main difference is that these
 735  *      values are only required to be unique to a given socket.
 736  *
 737  * cpi_clogid
 738  *
 739  *      This represents the logical ID of a logical CPU. This value should be
 740  *      unique within a given socket for each logical CPU. This is allowed to be
 741  *      synthetic, though it is usually based off of the CPU's apic ID. The
 742  *      broader system expects that logical CPUs that have are part of the same
 743  *      core have contiguous numbers. For example, if there were two threads per
 744  *      core, then the core IDs divided by two should be the same and the first
 745  *      modulus two should be zero and the second one. For example, IDs 4 and 5
 746  *      indicate two logical CPUs that are part of the same core. But IDs 5 and
 747  *      6 represent two logical CPUs that are part of different cores.
 748  *
 749  *      While it is common for the cpi_coreid and the cpi_clogid to be derived
 750  *      from the same source, strictly speaking, they don't have to be and the
 751  *      two values should be considered logically independent. One should not
 752  *      try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
 753  *      some kind of relationship. While this is tempting, we've seen cases on
 754  *      AMD family 0xf where the system's cpu id is not related to its APIC ID.
 755  *
 756  * cpi_ncpu_per_chip
 757  *
 758  *      This value indicates the total number of logical CPUs that exist in the
 759  *      physical package. Critically, this is not the number of logical CPUs
 760  *      that exist for just the single core.
 761  *
 762  *      This value should be the same for all logical CPUs in the same package.
 763  *
 764  * cpi_ncore_per_chip
 765  *
 766  *      This value indicates the total number of physical CPU cores that exist
 767  *      in the package. The system compares this value with cpi_ncpu_per_chip to
 768  *      determine if simultaneous multi-threading (SMT) is enabled. When
 769  *      cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
 770  *      the X86FSET_HTT feature is not set. If this value is greater than one,
 771  *      than we consider the processor to have the feature X86FSET_CMP, to
 772  *      indicate that there is support for more than one core.
 773  *
 774  *      This value should be the same for all logical CPUs in the same package.
 775  *
 776  * cpi_procnodes_per_pkg
 777  *
 778  *      This value indicates the number of 'nodes' that exist in the package.
 779  *      When processors are actually a multi-chip module, this represents the
 780  *      number of such modules that exist in the package. Currently, on Intel
 781  *      based systems this member is always set to 1.
 782  *
 783  *      This value should be the same for all logical CPUs in the same package.
 784  *
 785  * cpi_procnodeid
 786  *
 787  *      This value indicates the ID of the node that the logical CPU is a part
 788  *      of. All logical CPUs that are in the same node must have the same value
 789  *      here. This value must be unique across all of the packages in the
 790  *      system.  On Intel based systems, this is currently set to the value in
 791  *      cpi_chipid because there is only one node.
 792  *
 793  * cpi_cores_per_compunit
 794  *
 795  *      This value indicates the number of cores that are part of a compute
 796  *      unit. See the AMD topology section for this. This member only has real
 797  *      meaning currently for AMD Bulldozer family processors. For all other
 798  *      processors, this should currently be set to 1.
 799  *
 800  * cpi_compunitid
 801  *
 802  *      This indicates the compute unit that the logical CPU belongs to. For
 803  *      processors without AMD Bulldozer-style compute units this should be set
 804  *      to the value of cpi_coreid.
 805  *
 806  * cpi_ncpu_shr_last_cache
 807  *
 808  *      This indicates the number of logical CPUs that are sharing the same last
 809  *      level cache. This value should be the same for all CPUs that are sharing
 810  *      that cache. The last cache refers to the cache that is closest to memory
 811  *      and furthest away from the CPU.
 812  *
 813  * cpi_last_lvl_cacheid
 814  *
 815  *      This indicates the ID of the last cache that the logical CPU uses. This
 816  *      cache is often shared between multiple logical CPUs and is the cache
 817  *      that is closest to memory and furthest away from the CPU. This value
 818  *      should be the same for a group of logical CPUs only if they actually
 819  *      share the same last level cache. IDs should not overlap between
 820  *      packages.
 821  *
 822  * cpi_ncore_bits
 823  *
 824  *      This indicates the number of bits that are required to represent all of
 825  *      the cores in the system. As cores are derived based on their APIC IDs,
 826  *      we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
 827  *      this value to be larger than the actual number of IDs that are present
 828  *      in the system. This is used to size tables by the CMI framework. It is
 829  *      only filled in for Intel and AMD CPUs.
 830  *
 831  * cpi_nthread_bits
 832  *
 833  *      This indicates the number of bits required to represent all of the IDs
 834  *      that cover the logical CPUs that exist on a given core. It's OK for this
 835  *      value to be larger than the actual number of IDs that are present in the
 836  *      system.  This is used to size tables by the CMI framework. It is
 837  *      only filled in for Intel and AMD CPUs.
 838  *
 839  * -----------
 840  * Hypervisors
 841  * -----------
 842  *
 843  * If trying to manage the differences between vendors wasn't bad enough, it can
 844  * get worse thanks to our friend hardware virtualization. Hypervisors are given
 845  * the ability to interpose on all cpuid instructions and change them to suit
 846  * their purposes. In general, this is necessary as the hypervisor wants to be
 847  * able to present a more uniform set of features or not necessarily give the
 848  * guest operating system kernel knowledge of all features so it can be
 849  * more easily migrated between systems.
 850  *
 851  * When it comes to trying to determine topology information, this can be a
 852  * double edged sword. When a hypervisor doesn't actually implement a cpuid
 853  * leaf, it'll often return all zeros. Because of that, you'll often see various
 854  * checks scattered about fields being non-zero before we assume we can use
 855  * them.
 856  *
 857  * When it comes to topology information, the hypervisor is often incentivized
 858  * to lie to you about topology. This is because it doesn't always actually
 859  * guarantee that topology at all. The topology path we take in the system
 860  * depends on how the CPU advertises itself. If it advertises itself as an Intel
 861  * or AMD CPU, then we basically do our normal path. However, when they don't
 862  * use an actual vendor, then that usually turns into multiple one-core CPUs
 863  * that we enumerate that are often on different sockets. The actual behavior
 864  * depends greatly on what the hypervisor actually exposes to us.
 865  *
 866  * --------------------
 867  * Exposing Information
 868  * --------------------
 869  *
 870  * We expose CPUID information in three different forms in the system.
 871  *
 872  * The first is through the x86_featureset variable. This is used in conjunction
 873  * with the is_x86_feature() function. This is queried by x86-specific functions
 874  * to determine which features are or aren't present in the system and to make
 875  * decisions based upon them. For example, users of this include everything from
 876  * parts of the system dedicated to reliability, availability, and
 877  * serviceability (RAS), to making decisions about how to handle security
 878  * mitigations, to various x86-specific drivers. General purpose or
 879  * architecture independent drivers should never be calling this function.
 880  *
 881  * The second means is through the auxiliary vector. The auxiliary vector is a
 882  * series of tagged data that the kernel passes down to a user program when it
 883  * begins executing. This information is used to indicate to programs what
 884  * instruction set extensions are present. For example, information about the
 885  * CPU supporting the machine check architecture (MCA) wouldn't be passed down
 886  * since user programs cannot make use of it. However, things like the AVX
 887  * instruction sets are. Programs use this information to make run-time
 888  * decisions about what features they should use. As an example, the run-time
 889  * link-editor (rtld) can relocate different functions depending on the hardware
 890  * support available.
 891  *
 892  * The final form is through a series of accessor functions that all have the
 893  * form cpuid_get*. This is used by a number of different subsystems in the
 894  * kernel to determine more detailed information about what we're running on,
 895  * topology information, etc. Some of these subsystems include processor groups
 896  * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
 897  * microcode, and performance monitoring. These functions all ASSERT that the
 898  * CPU they're being called on has reached a certain cpuid pass. If the passes
 899  * are rearranged, then this needs to be adjusted.
 900  */
 901 
 902 #include <sys/types.h>
 903 #include <sys/archsystm.h>
 904 #include <sys/x86_archext.h>
 905 #include <sys/kmem.h>
 906 #include <sys/systm.h>
 907 #include <sys/cmn_err.h>
 908 #include <sys/sunddi.h>
 909 #include <sys/sunndi.h>
 910 #include <sys/cpuvar.h>
 911 #include <sys/processor.h>
 912 #include <sys/sysmacros.h>
 913 #include <sys/pg.h>
 914 #include <sys/fp.h>
 915 #include <sys/controlregs.h>
 916 #include <sys/bitmap.h>
 917 #include <sys/auxv_386.h>
 918 #include <sys/memnode.h>
 919 #include <sys/pci_cfgspace.h>
 920 #include <sys/comm_page.h>
 921 #include <sys/mach_mmu.h>
 922 #include <sys/ucode.h>
 923 #include <sys/tsc.h>
 924 
 925 #ifdef __xpv
 926 #include <sys/hypervisor.h>
 927 #else
 928 #include <sys/ontrap.h>
 929 #endif
 930 
 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
 932 uint_t x86_type = X86_TYPE_OTHER;
 933 uint_t x86_clflush_size = 0;
 934 
 935 #if defined(__xpv)
 936 int x86_use_pcid = 0;
 937 int x86_use_invpcid = 0;
 938 #else
 939 int x86_use_pcid = -1;
 940 int x86_use_invpcid = -1;
 941 #endif
 942 
 943 uint_t pentiumpro_bug4046376;
 944 
 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 946 
 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
 948         "lgpg",
 949         "tsc",
 950         "msr",
 951         "mtrr",
 952         "pge",
 953         "de",
 954         "cmov",
 955         "mmx",
 956         "mca",
 957         "pae",
 958         "cv8",
 959         "pat",
 960         "sep",
 961         "sse",
 962         "sse2",
 963         "htt",
 964         "asysc",
 965         "nx",
 966         "sse3",
 967         "cx16",
 968         "cmp",
 969         "tscp",
 970         "mwait",
 971         "sse4a",
 972         "cpuid",
 973         "ssse3",
 974         "sse4_1",
 975         "sse4_2",
 976         "1gpg",
 977         "clfsh",
 978         "64",
 979         "aes",
 980         "pclmulqdq",
 981         "xsave",
 982         "avx",
 983         "vmx",
 984         "svm",
 985         "topoext",
 986         "f16c",
 987         "rdrand",
 988         "x2apic",
 989         "avx2",
 990         "bmi1",
 991         "bmi2",
 992         "fma",
 993         "smep",
 994         "smap",
 995         "adx",
 996         "rdseed",
 997         "mpx",
 998         "avx512f",
 999         "avx512dq",
1000         "avx512pf",
1001         "avx512er",
1002         "avx512cd",
1003         "avx512bw",
1004         "avx512vl",
1005         "avx512fma",
1006         "avx512vbmi",
1007         "avx512_vpopcntdq",
1008         "avx512_4vnniw",
1009         "avx512_4fmaps",
1010         "xsaveopt",
1011         "xsavec",
1012         "xsaves",
1013         "sha",
1014         "umip",
1015         "pku",
1016         "ospke",
1017         "pcid",
1018         "invpcid",
1019         "ibrs",
1020         "ibpb",
1021         "stibp",
1022         "ssbd",
1023         "ssbd_virt",
1024         "rdcl_no",
1025         "ibrs_all",
1026         "rsba",
1027         "ssb_no",
1028         "stibp_all",
1029         "flush_cmd",
1030         "l1d_vmentry_no",
1031         "fsgsbase",
1032         "clflushopt",
1033         "clwb",
1034         "monitorx",
1035         "clzero",
1036         "xop",
1037         "fma4",
1038         "tbm",
1039         "avx512_vnni",
1040         "amd_pcec"
1041 };
1042 
1043 boolean_t
1044 is_x86_feature(void *featureset, uint_t feature)
1045 {
1046         ASSERT(feature < NUM_X86_FEATURES);
1047         return (BT_TEST((ulong_t *)featureset, feature));
1048 }
1049 
1050 void
1051 add_x86_feature(void *featureset, uint_t feature)
1052 {
1053         ASSERT(feature < NUM_X86_FEATURES);
1054         BT_SET((ulong_t *)featureset, feature);
1055 }
1056 
1057 void
1058 remove_x86_feature(void *featureset, uint_t feature)
1059 {
1060         ASSERT(feature < NUM_X86_FEATURES);
1061         BT_CLEAR((ulong_t *)featureset, feature);
1062 }
1063 
1064 boolean_t
1065 compare_x86_featureset(void *setA, void *setB)
1066 {
1067         /*
1068          * We assume that the unused bits of the bitmap are always zero.
1069          */
1070         if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1071                 return (B_TRUE);
1072         } else {
1073                 return (B_FALSE);
1074         }
1075 }
1076 
1077 void
1078 print_x86_featureset(void *featureset)
1079 {
1080         uint_t i;
1081 
1082         for (i = 0; i < NUM_X86_FEATURES; i++) {
1083                 if (is_x86_feature(featureset, i)) {
1084                         cmn_err(CE_CONT, "?x86_feature: %s\n",
1085                             x86_feature_names[i]);
1086                 }
1087         }
1088 }
1089 
1090 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1091 static size_t xsave_state_size = 0;
1092 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1093 boolean_t xsave_force_disable = B_FALSE;
1094 extern int disable_smap;
1095 
1096 /*
1097  * This is set to platform type we are running on.
1098  */
1099 static int platform_type = -1;
1100 
1101 #if !defined(__xpv)
1102 /*
1103  * Variable to patch if hypervisor platform detection needs to be
1104  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1105  */
1106 int enable_platform_detection = 1;
1107 #endif
1108 
1109 /*
1110  * monitor/mwait info.
1111  *
1112  * size_actual and buf_actual are the real address and size allocated to get
1113  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
1114  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
1115  * processor cache-line alignment, but this is not guarantied in the furture.
1116  */
1117 struct mwait_info {
1118         size_t          mon_min;        /* min size to avoid missed wakeups */
1119         size_t          mon_max;        /* size to avoid false wakeups */
1120         size_t          size_actual;    /* size actually allocated */
1121         void            *buf_actual;    /* memory actually allocated */
1122         uint32_t        support;        /* processor support of monitor/mwait */
1123 };
1124 
1125 /*
1126  * xsave/xrestor info.
1127  *
1128  * This structure contains HW feature bits and the size of the xsave save area.
1129  * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1130  * (xsave_state) to describe the xsave layout. However, at runtime the
1131  * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1132  * xsave_state structure simply represents the legacy layout of the beginning
1133  * of the xsave area.
1134  */
1135 struct xsave_info {
1136         uint32_t        xsav_hw_features_low;   /* Supported HW features */
1137         uint32_t        xsav_hw_features_high;  /* Supported HW features */
1138         size_t          xsav_max_size;  /* max size save area for HW features */
1139         size_t          ymm_size;       /* AVX: size of ymm save area */
1140         size_t          ymm_offset;     /* AVX: offset for ymm save area */
1141         size_t          bndregs_size;   /* MPX: size of bndregs save area */
1142         size_t          bndregs_offset; /* MPX: offset for bndregs save area */
1143         size_t          bndcsr_size;    /* MPX: size of bndcsr save area */
1144         size_t          bndcsr_offset;  /* MPX: offset for bndcsr save area */
1145         size_t          opmask_size;    /* AVX512: size of opmask save */
1146         size_t          opmask_offset;  /* AVX512: offset for opmask save */
1147         size_t          zmmlo_size;     /* AVX512: size of zmm 256 save */
1148         size_t          zmmlo_offset;   /* AVX512: offset for zmm 256 save */
1149         size_t          zmmhi_size;     /* AVX512: size of zmm hi reg save */
1150         size_t          zmmhi_offset;   /* AVX512: offset for zmm hi reg save */
1151 };
1152 
1153 
1154 /*
1155  * These constants determine how many of the elements of the
1156  * cpuid we cache in the cpuid_info data structure; the
1157  * remaining elements are accessible via the cpuid instruction.
1158  */
1159 
1160 #define NMAX_CPI_STD    8               /* eax = 0 .. 7 */
1161 #define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
1162 
1163 /*
1164  * See the big theory statement for a more detailed explanation of what some of
1165  * these members mean.
1166  */
1167 struct cpuid_info {
1168         uint_t cpi_pass;                /* last pass completed */
1169         /*
1170          * standard function information
1171          */
1172         uint_t cpi_maxeax;              /* fn 0: %eax */
1173         char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
1174         uint_t cpi_vendor;              /* enum of cpi_vendorstr */
1175 
1176         uint_t cpi_family;              /* fn 1: extended family */
1177         uint_t cpi_model;               /* fn 1: extended model */
1178         uint_t cpi_step;                /* fn 1: stepping */
1179         chipid_t cpi_chipid;            /* fn 1: %ebx:  Intel: chip # */
1180                                         /*              AMD: package/socket # */
1181         uint_t cpi_brandid;             /* fn 1: %ebx: brand ID */
1182         int cpi_clogid;                 /* fn 1: %ebx: thread # */
1183         uint_t cpi_ncpu_per_chip;       /* fn 1: %ebx: logical cpu count */
1184         uint8_t cpi_cacheinfo[16];      /* fn 2: intel-style cache desc */
1185         uint_t cpi_ncache;              /* fn 2: number of elements */
1186         uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1187         id_t cpi_last_lvl_cacheid;      /* fn 4: %eax: derived cache id */
1188         uint_t cpi_cache_leaf_size;     /* Number of cache elements */
1189                                         /* Intel fn: 4, AMD fn: 8000001d */
1190         struct cpuid_regs **cpi_cache_leaves;   /* Acual leaves from above */
1191         struct cpuid_regs cpi_std[NMAX_CPI_STD];        /* 0 .. 7 */
1192         /*
1193          * extended function information
1194          */
1195         uint_t cpi_xmaxeax;             /* fn 0x80000000: %eax */
1196         char cpi_brandstr[49];          /* fn 0x8000000[234] */
1197         uint8_t cpi_pabits;             /* fn 0x80000006: %eax */
1198         uint8_t cpi_vabits;             /* fn 0x80000006: %eax */
1199         uint8_t cpi_fp_amd_save;        /* AMD: FP error pointer save rqd. */
1200         struct  cpuid_regs cpi_extd[NMAX_CPI_EXTD];     /* 0x800000XX */
1201 
1202         id_t cpi_coreid;                /* same coreid => strands share core */
1203         int cpi_pkgcoreid;              /* core number within single package */
1204         uint_t cpi_ncore_per_chip;      /* AMD: fn 0x80000008: %ecx[7-0] */
1205                                         /* Intel: fn 4: %eax[31-26] */
1206 
1207         /*
1208          * These values represent the number of bits that are required to store
1209          * information about the number of cores and threads.
1210          */
1211         uint_t cpi_ncore_bits;
1212         uint_t cpi_nthread_bits;
1213         /*
1214          * supported feature information
1215          */
1216         uint32_t cpi_support[6];
1217 #define STD_EDX_FEATURES        0
1218 #define AMD_EDX_FEATURES        1
1219 #define TM_EDX_FEATURES         2
1220 #define STD_ECX_FEATURES        3
1221 #define AMD_ECX_FEATURES        4
1222 #define STD_EBX_FEATURES        5
1223         /*
1224          * Synthesized information, where known.
1225          */
1226         uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
1227         const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
1228         uint32_t cpi_socket;            /* Chip package/socket type */
1229 
1230         struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
1231         uint32_t cpi_apicid;
1232         uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
1233         uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
1234                                         /* Intel: 1 */
1235         uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
1236         uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
1237 
1238         struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
1239 };
1240 
1241 
1242 static struct cpuid_info cpuid_info0;
1243 
1244 /*
1245  * These bit fields are defined by the Intel Application Note AP-485
1246  * "Intel Processor Identification and the CPUID Instruction"
1247  */
1248 #define CPI_FAMILY_XTD(cpi)     BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1249 #define CPI_MODEL_XTD(cpi)      BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1250 #define CPI_TYPE(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1251 #define CPI_FAMILY(cpi)         BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1252 #define CPI_STEP(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1253 #define CPI_MODEL(cpi)          BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1254 
1255 #define CPI_FEATURES_EDX(cpi)           ((cpi)->cpi_std[1].cp_edx)
1256 #define CPI_FEATURES_ECX(cpi)           ((cpi)->cpi_std[1].cp_ecx)
1257 #define CPI_FEATURES_XTD_EDX(cpi)       ((cpi)->cpi_extd[1].cp_edx)
1258 #define CPI_FEATURES_XTD_ECX(cpi)       ((cpi)->cpi_extd[1].cp_ecx)
1259 #define CPI_FEATURES_7_0_EBX(cpi)       ((cpi)->cpi_std[7].cp_ebx)
1260 #define CPI_FEATURES_7_0_ECX(cpi)       ((cpi)->cpi_std[7].cp_ecx)
1261 #define CPI_FEATURES_7_0_EDX(cpi)       ((cpi)->cpi_std[7].cp_edx)
1262 
1263 #define CPI_BRANDID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1264 #define CPI_CHUNKS(cpi)         BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1265 #define CPI_CPU_COUNT(cpi)      BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1266 #define CPI_APIC_ID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1267 
1268 #define CPI_MAXEAX_MAX          0x100           /* sanity control */
1269 #define CPI_XMAXEAX_MAX         0x80000100
1270 #define CPI_FN4_ECX_MAX         0x20            /* sanity: max fn 4 levels */
1271 #define CPI_FNB_ECX_MAX         0x20            /* sanity: max fn B levels */
1272 
1273 /*
1274  * Function 4 (Deterministic Cache Parameters) macros
1275  * Defined by Intel Application Note AP-485
1276  */
1277 #define CPI_NUM_CORES(regs)             BITX((regs)->cp_eax, 31, 26)
1278 #define CPI_NTHR_SHR_CACHE(regs)        BITX((regs)->cp_eax, 25, 14)
1279 #define CPI_FULL_ASSOC_CACHE(regs)      BITX((regs)->cp_eax, 9, 9)
1280 #define CPI_SELF_INIT_CACHE(regs)       BITX((regs)->cp_eax, 8, 8)
1281 #define CPI_CACHE_LVL(regs)             BITX((regs)->cp_eax, 7, 5)
1282 #define CPI_CACHE_TYPE(regs)            BITX((regs)->cp_eax, 4, 0)
1283 #define CPI_CPU_LEVEL_TYPE(regs)        BITX((regs)->cp_ecx, 15, 8)
1284 
1285 #define CPI_CACHE_WAYS(regs)            BITX((regs)->cp_ebx, 31, 22)
1286 #define CPI_CACHE_PARTS(regs)           BITX((regs)->cp_ebx, 21, 12)
1287 #define CPI_CACHE_COH_LN_SZ(regs)       BITX((regs)->cp_ebx, 11, 0)
1288 
1289 #define CPI_CACHE_SETS(regs)            BITX((regs)->cp_ecx, 31, 0)
1290 
1291 #define CPI_PREFCH_STRIDE(regs)         BITX((regs)->cp_edx, 9, 0)
1292 
1293 
1294 /*
1295  * A couple of shorthand macros to identify "later" P6-family chips
1296  * like the Pentium M and Core.  First, the "older" P6-based stuff
1297  * (loosely defined as "pre-Pentium-4"):
1298  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1299  */
1300 #define IS_LEGACY_P6(cpi) (                     \
1301         cpi->cpi_family == 6 &&                      \
1302                 (cpi->cpi_model == 1 ||              \
1303                 cpi->cpi_model == 3 ||               \
1304                 cpi->cpi_model == 5 ||               \
1305                 cpi->cpi_model == 6 ||               \
1306                 cpi->cpi_model == 7 ||               \
1307                 cpi->cpi_model == 8 ||               \
1308                 cpi->cpi_model == 0xA ||     \
1309                 cpi->cpi_model == 0xB)               \
1310 )
1311 
1312 /* A "new F6" is everything with family 6 that's not the above */
1313 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1314 
1315 /* Extended family/model support */
1316 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1317         cpi->cpi_family >= 0xf)
1318 
1319 /*
1320  * Info for monitor/mwait idle loop.
1321  *
1322  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1323  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1324  * 2006.
1325  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1326  * Documentation Updates" #33633, Rev 2.05, December 2006.
1327  */
1328 #define MWAIT_SUPPORT           (0x00000001)    /* mwait supported */
1329 #define MWAIT_EXTENSIONS        (0x00000002)    /* extenstion supported */
1330 #define MWAIT_ECX_INT_ENABLE    (0x00000004)    /* ecx 1 extension supported */
1331 #define MWAIT_SUPPORTED(cpi)    ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1332 #define MWAIT_INT_ENABLE(cpi)   ((cpi)->cpi_std[5].cp_ecx & 0x2)
1333 #define MWAIT_EXTENSION(cpi)    ((cpi)->cpi_std[5].cp_ecx & 0x1)
1334 #define MWAIT_SIZE_MIN(cpi)     BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1335 #define MWAIT_SIZE_MAX(cpi)     BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1336 /*
1337  * Number of sub-cstates for a given c-state.
1338  */
1339 #define MWAIT_NUM_SUBC_STATES(cpi, c_state)                     \
1340         BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1341 
1342 /*
1343  * XSAVE leaf 0xD enumeration
1344  */
1345 #define CPUID_LEAFD_2_YMM_OFFSET        576
1346 #define CPUID_LEAFD_2_YMM_SIZE          256
1347 
1348 /*
1349  * Common extended leaf names to cut down on typos.
1350  */
1351 #define CPUID_LEAF_EXT_0                0x80000000
1352 #define CPUID_LEAF_EXT_8                0x80000008
1353 #define CPUID_LEAF_EXT_1d               0x8000001d
1354 #define CPUID_LEAF_EXT_1e               0x8000001e
1355 
1356 /*
1357  * Functions we consune from cpuid_subr.c;  don't publish these in a header
1358  * file to try and keep people using the expected cpuid_* interfaces.
1359  */
1360 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1361 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1362 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1363 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1364 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1365 
1366 /*
1367  * Apply up various platform-dependent restrictions where the
1368  * underlying platform restrictions mean the CPU can be marked
1369  * as less capable than its cpuid instruction would imply.
1370  */
1371 #if defined(__xpv)
1372 static void
1373 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1374 {
1375         switch (eax) {
1376         case 1: {
1377                 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1378                     0 : CPUID_INTC_EDX_MCA;
1379                 cp->cp_edx &=
1380                     ~(mcamask |
1381                     CPUID_INTC_EDX_PSE |
1382                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1383                     CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1384                     CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1385                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1386                     CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1387                 break;
1388         }
1389 
1390         case 0x80000001:
1391                 cp->cp_edx &=
1392                     ~(CPUID_AMD_EDX_PSE |
1393                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1394                     CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1395                     CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1396                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1397                     CPUID_AMD_EDX_TSCP);
1398                 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1399                 break;
1400         default:
1401                 break;
1402         }
1403 
1404         switch (vendor) {
1405         case X86_VENDOR_Intel:
1406                 switch (eax) {
1407                 case 4:
1408                         /*
1409                          * Zero out the (ncores-per-chip - 1) field
1410                          */
1411                         cp->cp_eax &= 0x03fffffff;
1412                         break;
1413                 default:
1414                         break;
1415                 }
1416                 break;
1417         case X86_VENDOR_AMD:
1418                 switch (eax) {
1419 
1420                 case 0x80000001:
1421                         cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1422                         break;
1423 
1424                 case CPUID_LEAF_EXT_8:
1425                         /*
1426                          * Zero out the (ncores-per-chip - 1) field
1427                          */
1428                         cp->cp_ecx &= 0xffffff00;
1429                         break;
1430                 default:
1431                         break;
1432                 }
1433                 break;
1434         default:
1435                 break;
1436         }
1437 }
1438 #else
1439 #define platform_cpuid_mangle(vendor, eax, cp)  /* nothing */
1440 #endif
1441 
1442 /*
1443  *  Some undocumented ways of patching the results of the cpuid
1444  *  instruction to permit running Solaris 10 on future cpus that
1445  *  we don't currently support.  Could be set to non-zero values
1446  *  via settings in eeprom.
1447  */
1448 
1449 uint32_t cpuid_feature_ecx_include;
1450 uint32_t cpuid_feature_ecx_exclude;
1451 uint32_t cpuid_feature_edx_include;
1452 uint32_t cpuid_feature_edx_exclude;
1453 
1454 /*
1455  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1456  */
1457 void
1458 cpuid_alloc_space(cpu_t *cpu)
1459 {
1460         /*
1461          * By convention, cpu0 is the boot cpu, which is set up
1462          * before memory allocation is available.  All other cpus get
1463          * their cpuid_info struct allocated here.
1464          */
1465         ASSERT(cpu->cpu_id != 0);
1466         ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1467         cpu->cpu_m.mcpu_cpi =
1468             kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1469 }
1470 
1471 void
1472 cpuid_free_space(cpu_t *cpu)
1473 {
1474         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1475         int i;
1476 
1477         ASSERT(cpi != NULL);
1478         ASSERT(cpi != &cpuid_info0);
1479 
1480         /*
1481          * Free up any cache leaf related dynamic storage. The first entry was
1482          * cached from the standard cpuid storage, so we should not free it.
1483          */
1484         for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1485                 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1486         if (cpi->cpi_cache_leaf_size > 0)
1487                 kmem_free(cpi->cpi_cache_leaves,
1488                     cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1489 
1490         kmem_free(cpi, sizeof (*cpi));
1491         cpu->cpu_m.mcpu_cpi = NULL;
1492 }
1493 
1494 #if !defined(__xpv)
1495 /*
1496  * Determine the type of the underlying platform. This is used to customize
1497  * initialization of various subsystems (e.g. TSC). determine_platform() must
1498  * only ever be called once to prevent two processors from seeing different
1499  * values of platform_type. Must be called before cpuid_pass1(), the earliest
1500  * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1501  */
1502 void
1503 determine_platform(void)
1504 {
1505         struct cpuid_regs cp;
1506         uint32_t base;
1507         uint32_t regs[4];
1508         char *hvstr = (char *)regs;
1509 
1510         ASSERT(platform_type == -1);
1511 
1512         platform_type = HW_NATIVE;
1513 
1514         if (!enable_platform_detection)
1515                 return;
1516 
1517         /*
1518          * If Hypervisor CPUID bit is set, try to determine hypervisor
1519          * vendor signature, and set platform type accordingly.
1520          *
1521          * References:
1522          * http://lkml.org/lkml/2008/10/1/246
1523          * http://kb.vmware.com/kb/1009458
1524          */
1525         cp.cp_eax = 0x1;
1526         (void) __cpuid_insn(&cp);
1527         if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1528                 cp.cp_eax = 0x40000000;
1529                 (void) __cpuid_insn(&cp);
1530                 regs[0] = cp.cp_ebx;
1531                 regs[1] = cp.cp_ecx;
1532                 regs[2] = cp.cp_edx;
1533                 regs[3] = 0;
1534                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1535                         platform_type = HW_XEN_HVM;
1536                         return;
1537                 }
1538                 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1539                         platform_type = HW_VMWARE;
1540                         return;
1541                 }
1542                 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1543                         platform_type = HW_KVM;
1544                         return;
1545                 }
1546                 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1547                         platform_type = HW_BHYVE;
1548                         return;
1549                 }
1550                 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1551                         platform_type = HW_MICROSOFT;
1552         } else {
1553                 /*
1554                  * Check older VMware hardware versions. VMware hypervisor is
1555                  * detected by performing an IN operation to VMware hypervisor
1556                  * port and checking that value returned in %ebx is VMware
1557                  * hypervisor magic value.
1558                  *
1559                  * References: http://kb.vmware.com/kb/1009458
1560                  */
1561                 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1562                 if (regs[1] == VMWARE_HVMAGIC) {
1563                         platform_type = HW_VMWARE;
1564                         return;
1565                 }
1566         }
1567 
1568         /*
1569          * Check Xen hypervisor. In a fully virtualized domain,
1570          * Xen's pseudo-cpuid function returns a string representing the
1571          * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1572          * supported cpuid function. We need at least a (base + 2) leaf value
1573          * to do what we want to do. Try different base values, since the
1574          * hypervisor might use a different one depending on whether Hyper-V
1575          * emulation is switched on by default or not.
1576          */
1577         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1578                 cp.cp_eax = base;
1579                 (void) __cpuid_insn(&cp);
1580                 regs[0] = cp.cp_ebx;
1581                 regs[1] = cp.cp_ecx;
1582                 regs[2] = cp.cp_edx;
1583                 regs[3] = 0;
1584                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1585                     cp.cp_eax >= (base + 2)) {
1586                         platform_type &= ~HW_NATIVE;
1587                         platform_type |= HW_XEN_HVM;
1588                         return;
1589                 }
1590         }
1591 }
1592 
1593 int
1594 get_hwenv(void)
1595 {
1596         ASSERT(platform_type != -1);
1597         return (platform_type);
1598 }
1599 
1600 int
1601 is_controldom(void)
1602 {
1603         return (0);
1604 }
1605 
1606 #else
1607 
1608 int
1609 get_hwenv(void)
1610 {
1611         return (HW_XEN_PV);
1612 }
1613 
1614 int
1615 is_controldom(void)
1616 {
1617         return (DOMAIN_IS_INITDOMAIN(xen_info));
1618 }
1619 
1620 #endif  /* __xpv */
1621 
1622 /*
1623  * Make sure that we have gathered all of the CPUID leaves that we might need to
1624  * determine topology. We assume that the standard leaf 1 has already been done
1625  * and that xmaxeax has already been calculated.
1626  */
1627 static void
1628 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1629 {
1630         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1631 
1632         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1633                 struct cpuid_regs *cp;
1634 
1635                 cp = &cpi->cpi_extd[8];
1636                 cp->cp_eax = CPUID_LEAF_EXT_8;
1637                 (void) __cpuid_insn(cp);
1638                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1639         }
1640 
1641         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1642             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1643                 struct cpuid_regs *cp;
1644 
1645                 cp = &cpi->cpi_extd[0x1e];
1646                 cp->cp_eax = CPUID_LEAF_EXT_1e;
1647                 (void) __cpuid_insn(cp);
1648         }
1649 }
1650 
1651 /*
1652  * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1653  * it to everything else. If not, and we're on an AMD system where 8000001e is
1654  * valid, then we use that. Othewrise, we fall back to the default value for the
1655  * APIC ID in leaf 1.
1656  */
1657 static uint32_t
1658 cpuid_gather_apicid(struct cpuid_info *cpi)
1659 {
1660         /*
1661          * Leaf B changes based on the arguments to it. Beacuse we don't cache
1662          * it, we need to gather it again.
1663          */
1664         if (cpi->cpi_maxeax >= 0xB) {
1665                 struct cpuid_regs regs;
1666                 struct cpuid_regs *cp;
1667 
1668                 cp = &regs;
1669                 cp->cp_eax = 0xB;
1670                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1671                 (void) __cpuid_insn(cp);
1672 
1673                 if (cp->cp_ebx != 0) {
1674                         return (cp->cp_edx);
1675                 }
1676         }
1677 
1678         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1679             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1680             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1681                 return (cpi->cpi_extd[0x1e].cp_eax);
1682         }
1683 
1684         return (CPI_APIC_ID(cpi));
1685 }
1686 
1687 /*
1688  * For AMD processors, attempt to calculate the number of chips and cores that
1689  * exist. The way that we do this varies based on the generation, because the
1690  * generations themselves have changed dramatically.
1691  *
1692  * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1693  * However, with the advent of family 17h (Zen) it actually tells us the number
1694  * of threads, so we need to look at leaf 0x8000001e if available to determine
1695  * its value. Otherwise, for all prior families, the number of enabled cores is
1696  * the same as threads.
1697  *
1698  * If we do not have leaf 0x80000008, then we assume that this processor does
1699  * not have anything. AMD's older CPUID specification says there's no reason to
1700  * fall back to leaf 1.
1701  *
1702  * In some virtualization cases we will not have leaf 8000001e or it will be
1703  * zero. When that happens we assume the number of threads is one.
1704  */
1705 static void
1706 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1707 {
1708         uint_t nthreads, nthread_per_core;
1709 
1710         nthreads = nthread_per_core = 1;
1711 
1712         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1713                 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1714         } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1715                 nthreads = CPI_CPU_COUNT(cpi);
1716         }
1717 
1718         /*
1719          * For us to have threads, and know about it, we have to be at least at
1720          * family 17h and have the cpuid bit that says we have extended
1721          * topology.
1722          */
1723         if (cpi->cpi_family >= 0x17 &&
1724             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1725             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1726                 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1727         }
1728 
1729         *ncpus = nthreads;
1730         *ncores = nthreads / nthread_per_core;
1731 }
1732 
1733 /*
1734  * Seed the initial values for the cores and threads for an Intel based
1735  * processor. These values will be overwritten if we detect that the processor
1736  * supports CPUID leaf 0xb.
1737  */
1738 static void
1739 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1740 {
1741         /*
1742          * Only seed the number of physical cores from the first level leaf 4
1743          * information. The number of threads there indicate how many share the
1744          * L1 cache, which may or may not have anything to do with the number of
1745          * logical CPUs per core.
1746          */
1747         if (cpi->cpi_maxeax >= 4) {
1748                 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1749         } else {
1750                 *ncores = 1;
1751         }
1752 
1753         if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1754                 *ncpus = CPI_CPU_COUNT(cpi);
1755         } else {
1756                 *ncpus = *ncores;
1757         }
1758 }
1759 
1760 static boolean_t
1761 cpuid_leafB_getids(cpu_t *cpu)
1762 {
1763         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1764         struct cpuid_regs regs;
1765         struct cpuid_regs *cp;
1766 
1767         if (cpi->cpi_maxeax < 0xB)
1768                 return (B_FALSE);
1769 
1770         cp = &regs;
1771         cp->cp_eax = 0xB;
1772         cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1773 
1774         (void) __cpuid_insn(cp);
1775 
1776         /*
1777          * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1778          * indicates that the extended topology enumeration leaf is
1779          * available.
1780          */
1781         if (cp->cp_ebx != 0) {
1782                 uint32_t x2apic_id = 0;
1783                 uint_t coreid_shift = 0;
1784                 uint_t ncpu_per_core = 1;
1785                 uint_t chipid_shift = 0;
1786                 uint_t ncpu_per_chip = 1;
1787                 uint_t i;
1788                 uint_t level;
1789 
1790                 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1791                         cp->cp_eax = 0xB;
1792                         cp->cp_ecx = i;
1793 
1794                         (void) __cpuid_insn(cp);
1795                         level = CPI_CPU_LEVEL_TYPE(cp);
1796 
1797                         if (level == 1) {
1798                                 x2apic_id = cp->cp_edx;
1799                                 coreid_shift = BITX(cp->cp_eax, 4, 0);
1800                                 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1801                         } else if (level == 2) {
1802                                 x2apic_id = cp->cp_edx;
1803                                 chipid_shift = BITX(cp->cp_eax, 4, 0);
1804                                 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1805                         }
1806                 }
1807 
1808                 /*
1809                  * cpi_apicid is taken care of in cpuid_gather_apicid.
1810                  */
1811                 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1812                 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1813                     ncpu_per_core;
1814                 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1815                 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1816                 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1817                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1818                 cpi->cpi_procnodeid = cpi->cpi_chipid;
1819                 cpi->cpi_compunitid = cpi->cpi_coreid;
1820 
1821                 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1822                         cpi->cpi_nthread_bits = coreid_shift;
1823                         cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1824                 }
1825 
1826                 return (B_TRUE);
1827         } else {
1828                 return (B_FALSE);
1829         }
1830 }
1831 
1832 static void
1833 cpuid_intel_getids(cpu_t *cpu, void *feature)
1834 {
1835         uint_t i;
1836         uint_t chipid_shift = 0;
1837         uint_t coreid_shift = 0;
1838         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1839 
1840         /*
1841          * There are no compute units or processor nodes currently on Intel.
1842          * Always set these to one.
1843          */
1844         cpi->cpi_procnodes_per_pkg = 1;
1845         cpi->cpi_cores_per_compunit = 1;
1846 
1847         /*
1848          * If cpuid Leaf B is present, use that to try and get this information.
1849          * It will be the most accurate for Intel CPUs.
1850          */
1851         if (cpuid_leafB_getids(cpu))
1852                 return;
1853 
1854         /*
1855          * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1856          * and ncore_per_chip. These represent the largest power of two values
1857          * that we need to cover all of the IDs in the system. Therefore, we use
1858          * those values to seed the number of bits needed to cover information
1859          * in the case when leaf B is not available. These values will probably
1860          * be larger than required, but that's OK.
1861          */
1862         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1863         cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1864 
1865         for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1866                 chipid_shift++;
1867 
1868         cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1869         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1870 
1871         if (is_x86_feature(feature, X86FSET_CMP)) {
1872                 /*
1873                  * Multi-core (and possibly multi-threaded)
1874                  * processors.
1875                  */
1876                 uint_t ncpu_per_core;
1877                 if (cpi->cpi_ncore_per_chip == 1)
1878                         ncpu_per_core = cpi->cpi_ncpu_per_chip;
1879                 else if (cpi->cpi_ncore_per_chip > 1)
1880                         ncpu_per_core = cpi->cpi_ncpu_per_chip /
1881                             cpi->cpi_ncore_per_chip;
1882                 /*
1883                  * 8bit APIC IDs on dual core Pentiums
1884                  * look like this:
1885                  *
1886                  * +-----------------------+------+------+
1887                  * | Physical Package ID   |  MC  |  HT  |
1888                  * +-----------------------+------+------+
1889                  * <------- chipid -------->
1890                  * <------- coreid --------------->
1891                  *                         <--- clogid -->
1892                  *                         <------>
1893                  *                         pkgcoreid
1894                  *
1895                  * Where the number of bits necessary to
1896                  * represent MC and HT fields together equals
1897                  * to the minimum number of bits necessary to
1898                  * store the value of cpi->cpi_ncpu_per_chip.
1899                  * Of those bits, the MC part uses the number
1900                  * of bits necessary to store the value of
1901                  * cpi->cpi_ncore_per_chip.
1902                  */
1903                 for (i = 1; i < ncpu_per_core; i <<= 1)
1904                         coreid_shift++;
1905                 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1906                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1907         } else if (is_x86_feature(feature, X86FSET_HTT)) {
1908                 /*
1909                  * Single-core multi-threaded processors.
1910                  */
1911                 cpi->cpi_coreid = cpi->cpi_chipid;
1912                 cpi->cpi_pkgcoreid = 0;
1913         } else {
1914                 /*
1915                  * Single-core single-thread processors.
1916                  */
1917                 cpi->cpi_coreid = cpu->cpu_id;
1918                 cpi->cpi_pkgcoreid = 0;
1919         }
1920         cpi->cpi_procnodeid = cpi->cpi_chipid;
1921         cpi->cpi_compunitid = cpi->cpi_coreid;
1922 }
1923 
1924 /*
1925  * Historically, AMD has had CMP chips with only a single thread per core.
1926  * However, starting in family 17h (Zen), this has changed and they now have
1927  * multiple threads. Our internal core id needs to be a unique value.
1928  *
1929  * To determine the core id of an AMD system, if we're from a family before 17h,
1930  * then we just use the cpu id, as that gives us a good value that will be
1931  * unique for each core. If instead, we're on family 17h or later, then we need
1932  * to do something more complicated. CPUID leaf 0x8000001e can tell us
1933  * how many threads are in the system. Based on that, we'll shift the APIC ID.
1934  * We can't use the normal core id in that leaf as it's only unique within the
1935  * socket, which is perfect for cpi_pkgcoreid, but not us.
1936  */
1937 static id_t
1938 cpuid_amd_get_coreid(cpu_t *cpu)
1939 {
1940         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1941 
1942         if (cpi->cpi_family >= 0x17 &&
1943             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1944             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1945                 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1946                 if (nthreads > 1) {
1947                         VERIFY3U(nthreads, ==, 2);
1948                         return (cpi->cpi_apicid >> 1);
1949                 }
1950         }
1951 
1952         return (cpu->cpu_id);
1953 }
1954 
1955 /*
1956  * IDs on AMD is a more challenging task. This is notable because of the
1957  * following two facts:
1958  *
1959  *  1. Before family 0x17 (Zen), there was no support for SMT and there was
1960  *     also no way to get an actual unique core id from the system. As such, we
1961  *     synthesize this case by using cpu->cpu_id.  This scheme does not,
1962  *     however, guarantee that sibling cores of a chip will have sequential
1963  *     coreids starting at a multiple of the number of cores per chip - that is
1964  *     usually the case, but if the ACPI MADT table is presented in a different
1965  *     order then we need to perform a few more gymnastics for the pkgcoreid.
1966  *
1967  *  2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1968  *     called compute units. These compute units share the L1I cache, L2 cache,
1969  *     and the FPU. To deal with this, a new topology leaf was added in
1970  *     0x8000001e. However, parts of this leaf have different meanings
1971  *     once we get to family 0x17.
1972  */
1973 
1974 static void
1975 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1976 {
1977         int i, first_half, coreidsz;
1978         uint32_t nb_caps_reg;
1979         uint_t node2_1;
1980         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1981         struct cpuid_regs *cp;
1982 
1983         /*
1984          * Calculate the core id (this comes from hardware in family 0x17 if it
1985          * hasn't been stripped by virtualization). We always set the compute
1986          * unit id to the same value. Also, initialize the default number of
1987          * cores per compute unit and nodes per package. This will be
1988          * overwritten when we know information about a particular family.
1989          */
1990         cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1991         cpi->cpi_compunitid = cpi->cpi_coreid;
1992         cpi->cpi_cores_per_compunit = 1;
1993         cpi->cpi_procnodes_per_pkg = 1;
1994 
1995         /*
1996          * To construct the logical ID, we need to determine how many APIC IDs
1997          * are dedicated to the cores and threads. This is provided for us in
1998          * 0x80000008. However, if it's not present (say due to virtualization),
1999          * then we assume it's one. This should be present on all 64-bit AMD
2000          * processors.  It was added in family 0xf (Hammer).
2001          */
2002         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2003                 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2004 
2005                 /*
2006                  * In AMD parlance chip is really a node while illumos
2007                  * uses chip as equivalent to socket/package.
2008                  */
2009                 if (coreidsz == 0) {
2010                         /* Use legacy method */
2011                         for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2012                                 coreidsz++;
2013                         if (coreidsz == 0)
2014                                 coreidsz = 1;
2015                 }
2016         } else {
2017                 /* Assume single-core part */
2018                 coreidsz = 1;
2019         }
2020         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2021 
2022         /*
2023          * The package core ID varies depending on the family. For family 17h,
2024          * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2025          * can use the clogid as is. When family 17h is virtualized, the clogid
2026          * should be sufficient as if we don't have valid data in the leaf, then
2027          * we won't think we have SMT, in which case the cpi_clogid should be
2028          * sufficient.
2029          */
2030         if (cpi->cpi_family >= 0x17 &&
2031             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2032             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2033             cpi->cpi_extd[0x1e].cp_ebx != 0) {
2034                 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2035         } else {
2036                 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2037         }
2038 
2039         /*
2040          * Obtain the node ID and compute unit IDs. If we're on family 0x15
2041          * (bulldozer) or newer, then we can derive all of this from leaf
2042          * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2043          */
2044         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2045             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2046                 cp = &cpi->cpi_extd[0x1e];
2047 
2048                 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2049                 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2050 
2051                 /*
2052                  * For Bulldozer-era CPUs, recalculate the compute unit
2053                  * information.
2054                  */
2055                 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2056                         cpi->cpi_cores_per_compunit =
2057                             BITX(cp->cp_ebx, 15, 8) + 1;
2058                         cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2059                             (cpi->cpi_ncore_per_chip /
2060                             cpi->cpi_cores_per_compunit) *
2061                             (cpi->cpi_procnodeid /
2062                             cpi->cpi_procnodes_per_pkg);
2063                 }
2064         } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2065                 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2066         } else if (cpi->cpi_family == 0x10) {
2067                 /*
2068                  * See if we are a multi-node processor.
2069                  * All processors in the system have the same number of nodes
2070                  */
2071                 nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
2072                 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2073                         /* Single-node */
2074                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2075                             coreidsz);
2076                 } else {
2077 
2078                         /*
2079                          * Multi-node revision D (2 nodes per package
2080                          * are supported)
2081                          */
2082                         cpi->cpi_procnodes_per_pkg = 2;
2083 
2084                         first_half = (cpi->cpi_pkgcoreid <=
2085                             (cpi->cpi_ncore_per_chip/2 - 1));
2086 
2087                         if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2088                                 /* We are BSP */
2089                                 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2090                         } else {
2091 
2092                                 /* We are AP */
2093                                 /* NodeId[2:1] bits to use for reading F3xe8 */
2094                                 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2095 
2096                                 nb_caps_reg =
2097                                     pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2098 
2099                                 /*
2100                                  * Check IntNodeNum bit (31:30, but bit 31 is
2101                                  * always 0 on dual-node processors)
2102                                  */
2103                                 if (BITX(nb_caps_reg, 30, 30) == 0)
2104                                         cpi->cpi_procnodeid = node2_1 +
2105                                             !first_half;
2106                                 else
2107                                         cpi->cpi_procnodeid = node2_1 +
2108                                             first_half;
2109                         }
2110                 }
2111         } else {
2112                 cpi->cpi_procnodeid = 0;
2113         }
2114 
2115         cpi->cpi_chipid =
2116             cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2117 
2118         cpi->cpi_ncore_bits = coreidsz;
2119         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2120             cpi->cpi_ncore_per_chip);
2121 }
2122 
2123 static void
2124 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2125 {
2126         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2127 
2128         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2129             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2130                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2131                         add_x86_feature(featureset, X86FSET_IBPB);
2132                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2133                         add_x86_feature(featureset, X86FSET_IBRS);
2134                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2135                         add_x86_feature(featureset, X86FSET_STIBP);
2136                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2137                         add_x86_feature(featureset, X86FSET_IBRS_ALL);
2138                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2139                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2140                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2141                         add_x86_feature(featureset, X86FSET_RSBA);
2142                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2143                         add_x86_feature(featureset, X86FSET_SSBD);
2144                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2145                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2146                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2147                         add_x86_feature(featureset, X86FSET_SSB_NO);
2148         } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2149             cpi->cpi_maxeax >= 7) {
2150                 struct cpuid_regs *ecp;
2151                 ecp = &cpi->cpi_std[7];
2152 
2153                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2154                         add_x86_feature(featureset, X86FSET_IBRS);
2155                         add_x86_feature(featureset, X86FSET_IBPB);
2156                 }
2157 
2158                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2159                         add_x86_feature(featureset, X86FSET_STIBP);
2160                 }
2161 
2162                 /*
2163                  * Don't read the arch caps MSR on xpv where we lack the
2164                  * on_trap().
2165                  */
2166 #ifndef __xpv
2167                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2168                         on_trap_data_t otd;
2169 
2170                         /*
2171                          * Be paranoid and assume we'll get a #GP.
2172                          */
2173                         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2174                                 uint64_t reg;
2175 
2176                                 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2177                                 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2178                                         add_x86_feature(featureset,
2179                                             X86FSET_RDCL_NO);
2180                                 }
2181                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2182                                         add_x86_feature(featureset,
2183                                             X86FSET_IBRS_ALL);
2184                                 }
2185                                 if (reg & IA32_ARCH_CAP_RSBA) {
2186                                         add_x86_feature(featureset,
2187                                             X86FSET_RSBA);
2188                                 }
2189                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2190                                         add_x86_feature(featureset,
2191                                             X86FSET_L1D_VM_NO);
2192                                 }
2193                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2194                                         add_x86_feature(featureset,
2195                                             X86FSET_SSB_NO);
2196                                 }
2197                         }
2198                         no_trap();
2199                 }
2200 #endif  /* !__xpv */
2201 
2202                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2203                         add_x86_feature(featureset, X86FSET_SSBD);
2204 
2205                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2206                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2207         }
2208 }
2209 
2210 /*
2211  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2212  */
2213 void
2214 setup_xfem(void)
2215 {
2216         uint64_t flags = XFEATURE_LEGACY_FP;
2217 
2218         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2219 
2220         if (is_x86_feature(x86_featureset, X86FSET_SSE))
2221                 flags |= XFEATURE_SSE;
2222 
2223         if (is_x86_feature(x86_featureset, X86FSET_AVX))
2224                 flags |= XFEATURE_AVX;
2225 
2226         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2227                 flags |= XFEATURE_AVX512;
2228 
2229         set_xcr(XFEATURE_ENABLED_MASK, flags);
2230 
2231         xsave_bv_all = flags;
2232 }
2233 
2234 static void
2235 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2236 {
2237         struct cpuid_info *cpi;
2238 
2239         cpi = cpu->cpu_m.mcpu_cpi;
2240 
2241         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2242                 cpuid_gather_amd_topology_leaves(cpu);
2243         }
2244 
2245         cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2246 
2247         /*
2248          * Before we can calculate the IDs that we should assign to this
2249          * processor, we need to understand how many cores and threads it has.
2250          */
2251         switch (cpi->cpi_vendor) {
2252         case X86_VENDOR_Intel:
2253                 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2254                     &cpi->cpi_ncore_per_chip);
2255                 break;
2256         case X86_VENDOR_AMD:
2257                 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2258                     &cpi->cpi_ncore_per_chip);
2259                 break;
2260         default:
2261                 /*
2262                  * If we have some other x86 compatible chip, it's not clear how
2263                  * they would behave. The most common case is virtualization
2264                  * today, though there are also 64-bit VIA chips. Assume that
2265                  * all we can get is the basic Leaf 1 HTT information.
2266                  */
2267                 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2268                         cpi->cpi_ncore_per_chip = 1;
2269                         cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2270                 }
2271                 break;
2272         }
2273 
2274         /*
2275          * Based on the calculated number of threads and cores, potentially
2276          * assign the HTT and CMT features.
2277          */
2278         if (cpi->cpi_ncore_per_chip > 1) {
2279                 add_x86_feature(featureset, X86FSET_CMP);
2280         }
2281 
2282         if (cpi->cpi_ncpu_per_chip > 1 &&
2283             cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2284                 add_x86_feature(featureset, X86FSET_HTT);
2285         }
2286 
2287         /*
2288          * Now that has been set up, we need to go through and calculate all of
2289          * the rest of the parameters that exist. If we think the CPU doesn't
2290          * have either SMT (HTT) or CMP, then we basically go through and fake
2291          * up information in some way. The most likely case for this is
2292          * virtualization where we have a lot of partial topology information.
2293          */
2294         if (!is_x86_feature(featureset, X86FSET_HTT) &&
2295             !is_x86_feature(featureset, X86FSET_CMP)) {
2296                 /*
2297                  * This is a single core, single-threaded processor.
2298                  */
2299                 cpi->cpi_procnodes_per_pkg = 1;
2300                 cpi->cpi_cores_per_compunit = 1;
2301                 cpi->cpi_compunitid = 0;
2302                 cpi->cpi_chipid = -1;
2303                 cpi->cpi_clogid = 0;
2304                 cpi->cpi_coreid = cpu->cpu_id;
2305                 cpi->cpi_pkgcoreid = 0;
2306                 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2307                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2308                 } else {
2309                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2310                 }
2311         } else {
2312                 switch (cpi->cpi_vendor) {
2313                 case X86_VENDOR_Intel:
2314                         cpuid_intel_getids(cpu, featureset);
2315                         break;
2316                 case X86_VENDOR_AMD:
2317                         cpuid_amd_getids(cpu, featureset);
2318                         break;
2319                 default:
2320                         /*
2321                          * In this case, it's hard to say what we should do.
2322                          * We're going to model them to the OS as single core
2323                          * threads. We don't have a good identifier for them, so
2324                          * we're just going to use the cpu id all on a single
2325                          * chip.
2326                          *
2327                          * This case has historically been different from the
2328                          * case above where we don't have HTT or CMP. While they
2329                          * could be combined, we've opted to keep it separate to
2330                          * minimize the risk of topology changes in weird cases.
2331                          */
2332                         cpi->cpi_procnodes_per_pkg = 1;
2333                         cpi->cpi_cores_per_compunit = 1;
2334                         cpi->cpi_chipid = 0;
2335                         cpi->cpi_coreid = cpu->cpu_id;
2336                         cpi->cpi_clogid = cpu->cpu_id;
2337                         cpi->cpi_pkgcoreid = cpu->cpu_id;
2338                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2339                         cpi->cpi_compunitid = cpi->cpi_coreid;
2340                         break;
2341                 }
2342         }
2343 }
2344 
2345 void
2346 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2347 {
2348         uint32_t mask_ecx, mask_edx;
2349         struct cpuid_info *cpi;
2350         struct cpuid_regs *cp;
2351         int xcpuid;
2352 #if !defined(__xpv)
2353         extern int idle_cpu_prefer_mwait;
2354 #endif
2355 
2356         /*
2357          * Space statically allocated for BSP, ensure pointer is set
2358          */
2359         if (cpu->cpu_id == 0) {
2360                 if (cpu->cpu_m.mcpu_cpi == NULL)
2361                         cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2362         }
2363 
2364         add_x86_feature(featureset, X86FSET_CPUID);
2365 
2366         cpi = cpu->cpu_m.mcpu_cpi;
2367         ASSERT(cpi != NULL);
2368         cp = &cpi->cpi_std[0];
2369         cp->cp_eax = 0;
2370         cpi->cpi_maxeax = __cpuid_insn(cp);
2371         {
2372                 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2373                 *iptr++ = cp->cp_ebx;
2374                 *iptr++ = cp->cp_edx;
2375                 *iptr++ = cp->cp_ecx;
2376                 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2377         }
2378 
2379         cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2380         x86_vendor = cpi->cpi_vendor; /* for compatibility */
2381 
2382         /*
2383          * Limit the range in case of weird hardware
2384          */
2385         if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2386                 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2387         if (cpi->cpi_maxeax < 1)
2388                 goto pass1_done;
2389 
2390         cp = &cpi->cpi_std[1];
2391         cp->cp_eax = 1;
2392         (void) __cpuid_insn(cp);
2393 
2394         /*
2395          * Extract identifying constants for easy access.
2396          */
2397         cpi->cpi_model = CPI_MODEL(cpi);
2398         cpi->cpi_family = CPI_FAMILY(cpi);
2399 
2400         if (cpi->cpi_family == 0xf)
2401                 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2402 
2403         /*
2404          * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2405          * Intel, and presumably everyone else, uses model == 0xf, as
2406          * one would expect (max value means possible overflow).  Sigh.
2407          */
2408 
2409         switch (cpi->cpi_vendor) {
2410         case X86_VENDOR_Intel:
2411                 if (IS_EXTENDED_MODEL_INTEL(cpi))
2412                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2413                 break;
2414         case X86_VENDOR_AMD:
2415                 if (CPI_FAMILY(cpi) == 0xf)
2416                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2417                 break;
2418         default:
2419                 if (cpi->cpi_model == 0xf)
2420                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2421                 break;
2422         }
2423 
2424         cpi->cpi_step = CPI_STEP(cpi);
2425         cpi->cpi_brandid = CPI_BRANDID(cpi);
2426 
2427         /*
2428          * *default* assumptions:
2429          * - believe %edx feature word
2430          * - ignore %ecx feature word
2431          * - 32-bit virtual and physical addressing
2432          */
2433         mask_edx = 0xffffffff;
2434         mask_ecx = 0;
2435 
2436         cpi->cpi_pabits = cpi->cpi_vabits = 32;
2437 
2438         switch (cpi->cpi_vendor) {
2439         case X86_VENDOR_Intel:
2440                 if (cpi->cpi_family == 5)
2441                         x86_type = X86_TYPE_P5;
2442                 else if (IS_LEGACY_P6(cpi)) {
2443                         x86_type = X86_TYPE_P6;
2444                         pentiumpro_bug4046376 = 1;
2445                         /*
2446                          * Clear the SEP bit when it was set erroneously
2447                          */
2448                         if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2449                                 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2450                 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2451                         x86_type = X86_TYPE_P4;
2452                         /*
2453                          * We don't currently depend on any of the %ecx
2454                          * features until Prescott, so we'll only check
2455                          * this from P4 onwards.  We might want to revisit
2456                          * that idea later.
2457                          */
2458                         mask_ecx = 0xffffffff;
2459                 } else if (cpi->cpi_family > 0xf)
2460                         mask_ecx = 0xffffffff;
2461                 /*
2462                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2463                  * to obtain the monitor linesize.
2464                  */
2465                 if (cpi->cpi_maxeax < 5)
2466                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2467                 break;
2468         case X86_VENDOR_IntelClone:
2469         default:
2470                 break;
2471         case X86_VENDOR_AMD:
2472 #if defined(OPTERON_ERRATUM_108)
2473                 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2474                         cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2475                         cpi->cpi_model = 0xc;
2476                 } else
2477 #endif
2478                 if (cpi->cpi_family == 5) {
2479                         /*
2480                          * AMD K5 and K6
2481                          *
2482                          * These CPUs have an incomplete implementation
2483                          * of MCA/MCE which we mask away.
2484                          */
2485                         mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2486 
2487                         /*
2488                          * Model 0 uses the wrong (APIC) bit
2489                          * to indicate PGE.  Fix it here.
2490                          */
2491                         if (cpi->cpi_model == 0) {
2492                                 if (cp->cp_edx & 0x200) {
2493                                         cp->cp_edx &= ~0x200;
2494                                         cp->cp_edx |= CPUID_INTC_EDX_PGE;
2495                                 }
2496                         }
2497 
2498                         /*
2499                          * Early models had problems w/ MMX; disable.
2500                          */
2501                         if (cpi->cpi_model < 6)
2502                                 mask_edx &= ~CPUID_INTC_EDX_MMX;
2503                 }
2504 
2505                 /*
2506                  * For newer families, SSE3 and CX16, at least, are valid;
2507                  * enable all
2508                  */
2509                 if (cpi->cpi_family >= 0xf)
2510                         mask_ecx = 0xffffffff;
2511                 /*
2512                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2513                  * to obtain the monitor linesize.
2514                  */
2515                 if (cpi->cpi_maxeax < 5)
2516                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2517 
2518 #if !defined(__xpv)
2519                 /*
2520                  * AMD has not historically used MWAIT in the CPU's idle loop.
2521                  * Pre-family-10h Opterons do not have the MWAIT instruction. We
2522                  * know for certain that in at least family 17h, per AMD, mwait
2523                  * is preferred. Families in-between are less certain.
2524                  */
2525                 if (cpi->cpi_family < 0x17) {
2526                         idle_cpu_prefer_mwait = 0;
2527                 }
2528 #endif
2529 
2530                 break;
2531         case X86_VENDOR_TM:
2532                 /*
2533                  * workaround the NT workaround in CMS 4.1
2534                  */
2535                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2536                     (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2537                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2538                 break;
2539         case X86_VENDOR_Centaur:
2540                 /*
2541                  * workaround the NT workarounds again
2542                  */
2543                 if (cpi->cpi_family == 6)
2544                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2545                 break;
2546         case X86_VENDOR_Cyrix:
2547                 /*
2548                  * We rely heavily on the probing in locore
2549                  * to actually figure out what parts, if any,
2550                  * of the Cyrix cpuid instruction to believe.
2551                  */
2552                 switch (x86_type) {
2553                 case X86_TYPE_CYRIX_486:
2554                         mask_edx = 0;
2555                         break;
2556                 case X86_TYPE_CYRIX_6x86:
2557                         mask_edx = 0;
2558                         break;
2559                 case X86_TYPE_CYRIX_6x86L:
2560                         mask_edx =
2561                             CPUID_INTC_EDX_DE |
2562                             CPUID_INTC_EDX_CX8;
2563                         break;
2564                 case X86_TYPE_CYRIX_6x86MX:
2565                         mask_edx =
2566                             CPUID_INTC_EDX_DE |
2567                             CPUID_INTC_EDX_MSR |
2568                             CPUID_INTC_EDX_CX8 |
2569                             CPUID_INTC_EDX_PGE |
2570                             CPUID_INTC_EDX_CMOV |
2571                             CPUID_INTC_EDX_MMX;
2572                         break;
2573                 case X86_TYPE_CYRIX_GXm:
2574                         mask_edx =
2575                             CPUID_INTC_EDX_MSR |
2576                             CPUID_INTC_EDX_CX8 |
2577                             CPUID_INTC_EDX_CMOV |
2578                             CPUID_INTC_EDX_MMX;
2579                         break;
2580                 case X86_TYPE_CYRIX_MediaGX:
2581                         break;
2582                 case X86_TYPE_CYRIX_MII:
2583                 case X86_TYPE_VIA_CYRIX_III:
2584                         mask_edx =
2585                             CPUID_INTC_EDX_DE |
2586                             CPUID_INTC_EDX_TSC |
2587                             CPUID_INTC_EDX_MSR |
2588                             CPUID_INTC_EDX_CX8 |
2589                             CPUID_INTC_EDX_PGE |
2590                             CPUID_INTC_EDX_CMOV |
2591                             CPUID_INTC_EDX_MMX;
2592                         break;
2593                 default:
2594                         break;
2595                 }
2596                 break;
2597         }
2598 
2599 #if defined(__xpv)
2600         /*
2601          * Do not support MONITOR/MWAIT under a hypervisor
2602          */
2603         mask_ecx &= ~CPUID_INTC_ECX_MON;
2604         /*
2605          * Do not support XSAVE under a hypervisor for now
2606          */
2607         xsave_force_disable = B_TRUE;
2608 
2609 #endif  /* __xpv */
2610 
2611         if (xsave_force_disable) {
2612                 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2613                 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2614                 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2615                 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2616         }
2617 
2618         /*
2619          * Now we've figured out the masks that determine
2620          * which bits we choose to believe, apply the masks
2621          * to the feature words, then map the kernel's view
2622          * of these feature words into its feature word.
2623          */
2624         cp->cp_edx &= mask_edx;
2625         cp->cp_ecx &= mask_ecx;
2626 
2627         /*
2628          * apply any platform restrictions (we don't call this
2629          * immediately after __cpuid_insn here, because we need the
2630          * workarounds applied above first)
2631          */
2632         platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2633 
2634         /*
2635          * In addition to ecx and edx, Intel and AMD are storing a bunch of
2636          * instruction set extensions in leaf 7's ebx, ecx, and edx.
2637          */
2638         if (cpi->cpi_maxeax >= 7) {
2639                 struct cpuid_regs *ecp;
2640                 ecp = &cpi->cpi_std[7];
2641                 ecp->cp_eax = 7;
2642                 ecp->cp_ecx = 0;
2643                 (void) __cpuid_insn(ecp);
2644 
2645                 /*
2646                  * If XSAVE has been disabled, just ignore all of the
2647                  * extended-save-area dependent flags here.
2648                  */
2649                 if (xsave_force_disable) {
2650                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2651                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2652                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2653                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2654                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2655                         ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2656                         ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2657                 }
2658 
2659                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2660                         add_x86_feature(featureset, X86FSET_SMEP);
2661 
2662                 /*
2663                  * We check disable_smap here in addition to in startup_smap()
2664                  * to ensure CPUs that aren't the boot CPU don't accidentally
2665                  * include it in the feature set and thus generate a mismatched
2666                  * x86 feature set across CPUs.
2667                  */
2668                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2669                     disable_smap == 0)
2670                         add_x86_feature(featureset, X86FSET_SMAP);
2671 
2672                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2673                         add_x86_feature(featureset, X86FSET_RDSEED);
2674 
2675                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2676                         add_x86_feature(featureset, X86FSET_ADX);
2677 
2678                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2679                         add_x86_feature(featureset, X86FSET_FSGSBASE);
2680 
2681                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2682                         add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2683 
2684                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2685                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2686                                 add_x86_feature(featureset, X86FSET_INVPCID);
2687 
2688                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2689                                 add_x86_feature(featureset, X86FSET_MPX);
2690 
2691                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2692                                 add_x86_feature(featureset, X86FSET_CLWB);
2693                 }
2694         }
2695 
2696         /*
2697          * fold in overrides from the "eeprom" mechanism
2698          */
2699         cp->cp_edx |= cpuid_feature_edx_include;
2700         cp->cp_edx &= ~cpuid_feature_edx_exclude;
2701 
2702         cp->cp_ecx |= cpuid_feature_ecx_include;
2703         cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2704 
2705         if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2706                 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2707         }
2708         if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2709                 add_x86_feature(featureset, X86FSET_TSC);
2710         }
2711         if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2712                 add_x86_feature(featureset, X86FSET_MSR);
2713         }
2714         if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2715                 add_x86_feature(featureset, X86FSET_MTRR);
2716         }
2717         if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2718                 add_x86_feature(featureset, X86FSET_PGE);
2719         }
2720         if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2721                 add_x86_feature(featureset, X86FSET_CMOV);
2722         }
2723         if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2724                 add_x86_feature(featureset, X86FSET_MMX);
2725         }
2726         if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2727             (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2728                 add_x86_feature(featureset, X86FSET_MCA);
2729         }
2730         if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2731                 add_x86_feature(featureset, X86FSET_PAE);
2732         }
2733         if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2734                 add_x86_feature(featureset, X86FSET_CX8);
2735         }
2736         if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2737                 add_x86_feature(featureset, X86FSET_CX16);
2738         }
2739         if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2740                 add_x86_feature(featureset, X86FSET_PAT);
2741         }
2742         if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2743                 add_x86_feature(featureset, X86FSET_SEP);
2744         }
2745         if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2746                 /*
2747                  * In our implementation, fxsave/fxrstor
2748                  * are prerequisites before we'll even
2749                  * try and do SSE things.
2750                  */
2751                 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2752                         add_x86_feature(featureset, X86FSET_SSE);
2753                 }
2754                 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2755                         add_x86_feature(featureset, X86FSET_SSE2);
2756                 }
2757                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2758                         add_x86_feature(featureset, X86FSET_SSE3);
2759                 }
2760                 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2761                         add_x86_feature(featureset, X86FSET_SSSE3);
2762                 }
2763                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2764                         add_x86_feature(featureset, X86FSET_SSE4_1);
2765                 }
2766                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2767                         add_x86_feature(featureset, X86FSET_SSE4_2);
2768                 }
2769                 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2770                         add_x86_feature(featureset, X86FSET_AES);
2771                 }
2772                 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2773                         add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2774                 }
2775 
2776                 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2777                         add_x86_feature(featureset, X86FSET_SHA);
2778 
2779                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2780                         add_x86_feature(featureset, X86FSET_UMIP);
2781                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2782                         add_x86_feature(featureset, X86FSET_PKU);
2783                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2784                         add_x86_feature(featureset, X86FSET_OSPKE);
2785 
2786                 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2787                         add_x86_feature(featureset, X86FSET_XSAVE);
2788 
2789                         /* We only test AVX & AVX512 when there is XSAVE */
2790 
2791                         if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2792                                 add_x86_feature(featureset,
2793                                     X86FSET_AVX);
2794 
2795                                 /*
2796                                  * Intel says we can't check these without also
2797                                  * checking AVX.
2798                                  */
2799                                 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2800                                         add_x86_feature(featureset,
2801                                             X86FSET_F16C);
2802 
2803                                 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2804                                         add_x86_feature(featureset,
2805                                             X86FSET_FMA);
2806 
2807                                 if (cpi->cpi_std[7].cp_ebx &
2808                                     CPUID_INTC_EBX_7_0_BMI1)
2809                                         add_x86_feature(featureset,
2810                                             X86FSET_BMI1);
2811 
2812                                 if (cpi->cpi_std[7].cp_ebx &
2813                                     CPUID_INTC_EBX_7_0_BMI2)
2814                                         add_x86_feature(featureset,
2815                                             X86FSET_BMI2);
2816 
2817                                 if (cpi->cpi_std[7].cp_ebx &
2818                                     CPUID_INTC_EBX_7_0_AVX2)
2819                                         add_x86_feature(featureset,
2820                                             X86FSET_AVX2);
2821                         }
2822 
2823                         if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2824                             (cpi->cpi_std[7].cp_ebx &
2825                             CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2826                                 add_x86_feature(featureset, X86FSET_AVX512F);
2827 
2828                                 if (cpi->cpi_std[7].cp_ebx &
2829                                     CPUID_INTC_EBX_7_0_AVX512DQ)
2830                                         add_x86_feature(featureset,
2831                                             X86FSET_AVX512DQ);
2832                                 if (cpi->cpi_std[7].cp_ebx &
2833                                     CPUID_INTC_EBX_7_0_AVX512IFMA)
2834                                         add_x86_feature(featureset,
2835                                             X86FSET_AVX512FMA);
2836                                 if (cpi->cpi_std[7].cp_ebx &
2837                                     CPUID_INTC_EBX_7_0_AVX512PF)
2838                                         add_x86_feature(featureset,
2839                                             X86FSET_AVX512PF);
2840                                 if (cpi->cpi_std[7].cp_ebx &
2841                                     CPUID_INTC_EBX_7_0_AVX512ER)
2842                                         add_x86_feature(featureset,
2843                                             X86FSET_AVX512ER);
2844                                 if (cpi->cpi_std[7].cp_ebx &
2845                                     CPUID_INTC_EBX_7_0_AVX512CD)
2846                                         add_x86_feature(featureset,
2847                                             X86FSET_AVX512CD);
2848                                 if (cpi->cpi_std[7].cp_ebx &
2849                                     CPUID_INTC_EBX_7_0_AVX512BW)
2850                                         add_x86_feature(featureset,
2851                                             X86FSET_AVX512BW);
2852                                 if (cpi->cpi_std[7].cp_ebx &
2853                                     CPUID_INTC_EBX_7_0_AVX512VL)
2854                                         add_x86_feature(featureset,
2855                                             X86FSET_AVX512VL);
2856 
2857                                 if (cpi->cpi_std[7].cp_ecx &
2858                                     CPUID_INTC_ECX_7_0_AVX512VBMI)
2859                                         add_x86_feature(featureset,
2860                                             X86FSET_AVX512VBMI);
2861                                 if (cpi->cpi_std[7].cp_ecx &
2862                                     CPUID_INTC_ECX_7_0_AVX512VNNI)
2863                                         add_x86_feature(featureset,
2864                                             X86FSET_AVX512VNNI);
2865                                 if (cpi->cpi_std[7].cp_ecx &
2866                                     CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2867                                         add_x86_feature(featureset,
2868                                             X86FSET_AVX512VPOPCDQ);
2869 
2870                                 if (cpi->cpi_std[7].cp_edx &
2871                                     CPUID_INTC_EDX_7_0_AVX5124NNIW)
2872                                         add_x86_feature(featureset,
2873                                             X86FSET_AVX512NNIW);
2874                                 if (cpi->cpi_std[7].cp_edx &
2875                                     CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2876                                         add_x86_feature(featureset,
2877                                             X86FSET_AVX512FMAPS);
2878                         }
2879                 }
2880         }
2881 
2882         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2883                 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2884                         add_x86_feature(featureset, X86FSET_PCID);
2885                 }
2886         }
2887 
2888         if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2889                 add_x86_feature(featureset, X86FSET_X2APIC);
2890         }
2891         if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2892                 add_x86_feature(featureset, X86FSET_DE);
2893         }
2894 #if !defined(__xpv)
2895         if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2896 
2897                 /*
2898                  * We require the CLFLUSH instruction for erratum workaround
2899                  * to use MONITOR/MWAIT.
2900                  */
2901                 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2902                         cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2903                         add_x86_feature(featureset, X86FSET_MWAIT);
2904                 } else {
2905                         extern int idle_cpu_assert_cflush_monitor;
2906 
2907                         /*
2908                          * All processors we are aware of which have
2909                          * MONITOR/MWAIT also have CLFLUSH.
2910                          */
2911                         if (idle_cpu_assert_cflush_monitor) {
2912                                 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2913                                     (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2914                         }
2915                 }
2916         }
2917 #endif  /* __xpv */
2918 
2919         if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2920                 add_x86_feature(featureset, X86FSET_VMX);
2921         }
2922 
2923         if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2924                 add_x86_feature(featureset, X86FSET_RDRAND);
2925 
2926         /*
2927          * Only need it first time, rest of the cpus would follow suit.
2928          * we only capture this for the bootcpu.
2929          */
2930         if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2931                 add_x86_feature(featureset, X86FSET_CLFSH);
2932                 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2933         }
2934         if (is_x86_feature(featureset, X86FSET_PAE))
2935                 cpi->cpi_pabits = 36;
2936 
2937         if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2938                 struct cpuid_regs r, *ecp;
2939 
2940                 ecp = &r;
2941                 ecp->cp_eax = 0xD;
2942                 ecp->cp_ecx = 1;
2943                 ecp->cp_edx = ecp->cp_ebx = 0;
2944                 (void) __cpuid_insn(ecp);
2945 
2946                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2947                         add_x86_feature(featureset, X86FSET_XSAVEOPT);
2948                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2949                         add_x86_feature(featureset, X86FSET_XSAVEC);
2950                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
2951                         add_x86_feature(featureset, X86FSET_XSAVES);
2952         }
2953 
2954         /*
2955          * Work on the "extended" feature information, doing
2956          * some basic initialization for cpuid_pass2()
2957          */
2958         xcpuid = 0;
2959         switch (cpi->cpi_vendor) {
2960         case X86_VENDOR_Intel:
2961                 /*
2962                  * On KVM we know we will have proper support for extended
2963                  * cpuid.
2964                  */
2965                 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
2966                     (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
2967                     (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
2968                         xcpuid++;
2969                 break;
2970         case X86_VENDOR_AMD:
2971                 if (cpi->cpi_family > 5 ||
2972                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
2973                         xcpuid++;
2974                 break;
2975         case X86_VENDOR_Cyrix:
2976                 /*
2977                  * Only these Cyrix CPUs are -known- to support
2978                  * extended cpuid operations.
2979                  */
2980                 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
2981                     x86_type == X86_TYPE_CYRIX_GXm)
2982                         xcpuid++;
2983                 break;
2984         case X86_VENDOR_Centaur:
2985         case X86_VENDOR_TM:
2986         default:
2987                 xcpuid++;
2988                 break;
2989         }
2990 
2991         if (xcpuid) {
2992                 cp = &cpi->cpi_extd[0];
2993                 cp->cp_eax = CPUID_LEAF_EXT_0;
2994                 cpi->cpi_xmaxeax = __cpuid_insn(cp);
2995         }
2996 
2997         if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
2998 
2999                 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3000                         cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3001 
3002                 switch (cpi->cpi_vendor) {
3003                 case X86_VENDOR_Intel:
3004                 case X86_VENDOR_AMD:
3005                         if (cpi->cpi_xmaxeax < 0x80000001)
3006                                 break;
3007                         cp = &cpi->cpi_extd[1];
3008                         cp->cp_eax = 0x80000001;
3009                         (void) __cpuid_insn(cp);
3010 
3011                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3012                             cpi->cpi_family == 5 &&
3013                             cpi->cpi_model == 6 &&
3014                             cpi->cpi_step == 6) {
3015                                 /*
3016                                  * K6 model 6 uses bit 10 to indicate SYSC
3017                                  * Later models use bit 11. Fix it here.
3018                                  */
3019                                 if (cp->cp_edx & 0x400) {
3020                                         cp->cp_edx &= ~0x400;
3021                                         cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3022                                 }
3023                         }
3024 
3025                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3026 
3027                         /*
3028                          * Compute the additions to the kernel's feature word.
3029                          */
3030                         if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3031                                 add_x86_feature(featureset, X86FSET_NX);
3032                         }
3033 
3034                         /*
3035                          * Regardless whether or not we boot 64-bit,
3036                          * we should have a way to identify whether
3037                          * the CPU is capable of running 64-bit.
3038                          */
3039                         if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3040                                 add_x86_feature(featureset, X86FSET_64);
3041                         }
3042 
3043                         /* 1 GB large page - enable only for 64 bit kernel */
3044                         if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3045                                 add_x86_feature(featureset, X86FSET_1GPG);
3046                         }
3047 
3048                         if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3049                             (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3050                             (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3051                                 add_x86_feature(featureset, X86FSET_SSE4A);
3052                         }
3053 
3054                         /*
3055                          * It's really tricky to support syscall/sysret in
3056                          * the i386 kernel; we rely on sysenter/sysexit
3057                          * instead.  In the amd64 kernel, things are -way-
3058                          * better.
3059                          */
3060                         if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3061                                 add_x86_feature(featureset, X86FSET_ASYSC);
3062                         }
3063 
3064                         /*
3065                          * While we're thinking about system calls, note
3066                          * that AMD processors don't support sysenter
3067                          * in long mode at all, so don't try to program them.
3068                          */
3069                         if (x86_vendor == X86_VENDOR_AMD) {
3070                                 remove_x86_feature(featureset, X86FSET_SEP);
3071                         }
3072 
3073                         if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3074                                 add_x86_feature(featureset, X86FSET_TSCP);
3075                         }
3076 
3077                         if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3078                                 add_x86_feature(featureset, X86FSET_SVM);
3079                         }
3080 
3081                         if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3082                                 add_x86_feature(featureset, X86FSET_TOPOEXT);
3083                         }
3084 
3085                         if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) {
3086                                 add_x86_feature(featureset, X86FSET_AMD_PCEC);
3087                         }
3088 
3089                         if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3090                                 add_x86_feature(featureset, X86FSET_XOP);
3091                         }
3092 
3093                         if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3094                                 add_x86_feature(featureset, X86FSET_FMA4);
3095                         }
3096 
3097                         if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3098                                 add_x86_feature(featureset, X86FSET_TBM);
3099                         }
3100 
3101                         if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3102                                 add_x86_feature(featureset, X86FSET_MONITORX);
3103                         }
3104                         break;
3105                 default:
3106                         break;
3107                 }
3108 
3109                 /*
3110                  * Get CPUID data about processor cores and hyperthreads.
3111                  */
3112                 switch (cpi->cpi_vendor) {
3113                 case X86_VENDOR_Intel:
3114                         if (cpi->cpi_maxeax >= 4) {
3115                                 cp = &cpi->cpi_std[4];
3116                                 cp->cp_eax = 4;
3117                                 cp->cp_ecx = 0;
3118                                 (void) __cpuid_insn(cp);
3119                                 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3120                         }
3121                         /*FALLTHROUGH*/
3122                 case X86_VENDOR_AMD:
3123                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3124                                 break;
3125                         cp = &cpi->cpi_extd[8];
3126                         cp->cp_eax = CPUID_LEAF_EXT_8;
3127                         (void) __cpuid_insn(cp);
3128                         platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3129                             cp);
3130 
3131                         /*
3132                          * AMD uses ebx for some extended functions.
3133                          */
3134                         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3135                                 /*
3136                                  * While we're here, check for the AMD "Error
3137                                  * Pointer Zero/Restore" feature. This can be
3138                                  * used to setup the FP save handlers
3139                                  * appropriately.
3140                                  */
3141                                 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3142                                         cpi->cpi_fp_amd_save = 0;
3143                                 } else {
3144                                         cpi->cpi_fp_amd_save = 1;
3145                                 }
3146 
3147                                 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3148                                         add_x86_feature(featureset,
3149                                             X86FSET_CLZERO);
3150                                 }
3151                         }
3152 
3153                         /*
3154                          * Virtual and physical address limits from
3155                          * cpuid override previously guessed values.
3156                          */
3157                         cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3158                         cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3159                         break;
3160                 default:
3161                         break;
3162                 }
3163 
3164                 /*
3165                  * Get CPUID data about TSC Invariance in Deep C-State.
3166                  */
3167                 switch (cpi->cpi_vendor) {
3168                 case X86_VENDOR_Intel:
3169                 case X86_VENDOR_AMD:
3170                         if (cpi->cpi_maxeax >= 7) {
3171                                 cp = &cpi->cpi_extd[7];
3172                                 cp->cp_eax = 0x80000007;
3173                                 cp->cp_ecx = 0;
3174                                 (void) __cpuid_insn(cp);
3175                         }
3176                         break;
3177                 default:
3178                         break;
3179                 }
3180         }
3181 
3182         cpuid_pass1_topology(cpu, featureset);
3183 
3184         /*
3185          * Synthesize chip "revision" and socket type
3186          */
3187         cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3188             cpi->cpi_model, cpi->cpi_step);
3189         cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3190             cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3191         cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3192             cpi->cpi_model, cpi->cpi_step);
3193 
3194         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3195                 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3196                     cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3197                         /* Special handling for AMD FP not necessary. */
3198                         cpi->cpi_fp_amd_save = 0;
3199                 } else {
3200                         cpi->cpi_fp_amd_save = 1;
3201                 }
3202         }
3203 
3204         /*
3205          * Check the processor leaves that are used for security features.
3206          */
3207         cpuid_scan_security(cpu, featureset);
3208 
3209 pass1_done:
3210         cpi->cpi_pass = 1;
3211 }
3212 
3213 /*
3214  * Make copies of the cpuid table entries we depend on, in
3215  * part for ease of parsing now, in part so that we have only
3216  * one place to correct any of it, in part for ease of
3217  * later export to userland, and in part so we can look at
3218  * this stuff in a crash dump.
3219  */
3220 
3221 /*ARGSUSED*/
3222 void
3223 cpuid_pass2(cpu_t *cpu)
3224 {
3225         uint_t n, nmax;
3226         int i;
3227         struct cpuid_regs *cp;
3228         uint8_t *dp;
3229         uint32_t *iptr;
3230         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3231 
3232         ASSERT(cpi->cpi_pass == 1);
3233 
3234         if (cpi->cpi_maxeax < 1)
3235                 goto pass2_done;
3236 
3237         if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3238                 nmax = NMAX_CPI_STD;
3239         /*
3240          * (We already handled n == 0 and n == 1 in pass 1)
3241          */
3242         for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3243                 cp->cp_eax = n;
3244 
3245                 /*
3246                  * n == 7 was handled in pass 1
3247                  */
3248                 if (n == 7)
3249                         continue;
3250 
3251                 /*
3252                  * CPUID function 4 expects %ecx to be initialized
3253                  * with an index which indicates which cache to return
3254                  * information about. The OS is expected to call function 4
3255                  * with %ecx set to 0, 1, 2, ... until it returns with
3256                  * EAX[4:0] set to 0, which indicates there are no more
3257                  * caches.
3258                  *
3259                  * Here, populate cpi_std[4] with the information returned by
3260                  * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3261                  * when dynamic memory allocation becomes available.
3262                  *
3263                  * Note: we need to explicitly initialize %ecx here, since
3264                  * function 4 may have been previously invoked.
3265                  */
3266                 if (n == 4)
3267                         cp->cp_ecx = 0;
3268 
3269                 (void) __cpuid_insn(cp);
3270                 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3271                 switch (n) {
3272                 case 2:
3273                         /*
3274                          * "the lower 8 bits of the %eax register
3275                          * contain a value that identifies the number
3276                          * of times the cpuid [instruction] has to be
3277                          * executed to obtain a complete image of the
3278                          * processor's caching systems."
3279                          *
3280                          * How *do* they make this stuff up?
3281                          */
3282                         cpi->cpi_ncache = sizeof (*cp) *
3283                             BITX(cp->cp_eax, 7, 0);
3284                         if (cpi->cpi_ncache == 0)
3285                                 break;
3286                         cpi->cpi_ncache--;   /* skip count byte */
3287 
3288                         /*
3289                          * Well, for now, rather than attempt to implement
3290                          * this slightly dubious algorithm, we just look
3291                          * at the first 15 ..
3292                          */
3293                         if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3294                                 cpi->cpi_ncache = sizeof (*cp) - 1;
3295 
3296                         dp = cpi->cpi_cacheinfo;
3297                         if (BITX(cp->cp_eax, 31, 31) == 0) {
3298                                 uint8_t *p = (void *)&cp->cp_eax;
3299                                 for (i = 1; i < 4; i++)
3300                                         if (p[i] != 0)
3301                                                 *dp++ = p[i];
3302                         }
3303                         if (BITX(cp->cp_ebx, 31, 31) == 0) {
3304                                 uint8_t *p = (void *)&cp->cp_ebx;
3305                                 for (i = 0; i < 4; i++)
3306                                         if (p[i] != 0)
3307                                                 *dp++ = p[i];
3308                         }
3309                         if (BITX(cp->cp_ecx, 31, 31) == 0) {
3310                                 uint8_t *p = (void *)&cp->cp_ecx;
3311                                 for (i = 0; i < 4; i++)
3312                                         if (p[i] != 0)
3313                                                 *dp++ = p[i];
3314                         }
3315                         if (BITX(cp->cp_edx, 31, 31) == 0) {
3316                                 uint8_t *p = (void *)&cp->cp_edx;
3317                                 for (i = 0; i < 4; i++)
3318                                         if (p[i] != 0)
3319                                                 *dp++ = p[i];
3320                         }
3321                         break;
3322 
3323                 case 3: /* Processor serial number, if PSN supported */
3324                         break;
3325 
3326                 case 4: /* Deterministic cache parameters */
3327                         break;
3328 
3329                 case 5: /* Monitor/Mwait parameters */
3330                 {
3331                         size_t mwait_size;
3332 
3333                         /*
3334                          * check cpi_mwait.support which was set in cpuid_pass1
3335                          */
3336                         if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3337                                 break;
3338 
3339                         /*
3340                          * Protect ourself from insane mwait line size.
3341                          * Workaround for incomplete hardware emulator(s).
3342                          */
3343                         mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3344                         if (mwait_size < sizeof (uint32_t) ||
3345                             !ISP2(mwait_size)) {
3346 #if DEBUG
3347                                 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3348                                     "size %ld", cpu->cpu_id, (long)mwait_size);
3349 #endif
3350                                 break;
3351                         }
3352 
3353                         cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3354                         cpi->cpi_mwait.mon_max = mwait_size;
3355                         if (MWAIT_EXTENSION(cpi)) {
3356                                 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3357                                 if (MWAIT_INT_ENABLE(cpi))
3358                                         cpi->cpi_mwait.support |=
3359                                             MWAIT_ECX_INT_ENABLE;
3360                         }
3361                         break;
3362                 }
3363                 default:
3364                         break;
3365                 }
3366         }
3367 
3368         /*
3369          * XSAVE enumeration
3370          */
3371         if (cpi->cpi_maxeax >= 0xD) {
3372                 struct cpuid_regs regs;
3373                 boolean_t cpuid_d_valid = B_TRUE;
3374 
3375                 cp = &regs;
3376                 cp->cp_eax = 0xD;
3377                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3378 
3379                 (void) __cpuid_insn(cp);
3380 
3381                 /*
3382                  * Sanity checks for debug
3383                  */
3384                 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3385                     (cp->cp_eax & XFEATURE_SSE) == 0) {
3386                         cpuid_d_valid = B_FALSE;
3387                 }
3388 
3389                 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3390                 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3391                 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3392 
3393                 /*
3394                  * If the hw supports AVX, get the size and offset in the save
3395                  * area for the ymm state.
3396                  */
3397                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3398                         cp->cp_eax = 0xD;
3399                         cp->cp_ecx = 2;
3400                         cp->cp_edx = cp->cp_ebx = 0;
3401 
3402                         (void) __cpuid_insn(cp);
3403 
3404                         if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3405                             cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3406                                 cpuid_d_valid = B_FALSE;
3407                         }
3408 
3409                         cpi->cpi_xsave.ymm_size = cp->cp_eax;
3410                         cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3411                 }
3412 
3413                 /*
3414                  * If the hw supports MPX, get the size and offset in the
3415                  * save area for BNDREGS and BNDCSR.
3416                  */
3417                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3418                         cp->cp_eax = 0xD;
3419                         cp->cp_ecx = 3;
3420                         cp->cp_edx = cp->cp_ebx = 0;
3421 
3422                         (void) __cpuid_insn(cp);
3423 
3424                         cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3425                         cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3426 
3427                         cp->cp_eax = 0xD;
3428                         cp->cp_ecx = 4;
3429                         cp->cp_edx = cp->cp_ebx = 0;
3430 
3431                         (void) __cpuid_insn(cp);
3432 
3433                         cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3434                         cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3435                 }
3436 
3437                 /*
3438                  * If the hw supports AVX512, get the size and offset in the
3439                  * save area for the opmask registers and zmm state.
3440                  */
3441                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3442                         cp->cp_eax = 0xD;
3443                         cp->cp_ecx = 5;
3444                         cp->cp_edx = cp->cp_ebx = 0;
3445 
3446                         (void) __cpuid_insn(cp);
3447 
3448                         cpi->cpi_xsave.opmask_size = cp->cp_eax;
3449                         cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3450 
3451                         cp->cp_eax = 0xD;
3452                         cp->cp_ecx = 6;
3453                         cp->cp_edx = cp->cp_ebx = 0;
3454 
3455                         (void) __cpuid_insn(cp);
3456 
3457                         cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3458                         cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3459 
3460                         cp->cp_eax = 0xD;
3461                         cp->cp_ecx = 7;
3462                         cp->cp_edx = cp->cp_ebx = 0;
3463 
3464                         (void) __cpuid_insn(cp);
3465 
3466                         cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3467                         cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3468                 }
3469 
3470                 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3471                         xsave_state_size = 0;
3472                 } else if (cpuid_d_valid) {
3473                         xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3474                 } else {
3475                         /* Broken CPUID 0xD, probably in HVM */
3476                         cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3477                             "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3478                             ", ymm_size = %d, ymm_offset = %d\n",
3479                             cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3480                             cpi->cpi_xsave.xsav_hw_features_high,
3481                             (int)cpi->cpi_xsave.xsav_max_size,
3482                             (int)cpi->cpi_xsave.ymm_size,
3483                             (int)cpi->cpi_xsave.ymm_offset);
3484 
3485                         if (xsave_state_size != 0) {
3486                                 /*
3487                                  * This must be a non-boot CPU. We cannot
3488                                  * continue, because boot cpu has already
3489                                  * enabled XSAVE.
3490                                  */
3491                                 ASSERT(cpu->cpu_id != 0);
3492                                 cmn_err(CE_PANIC, "cpu%d: we have already "
3493                                     "enabled XSAVE on boot cpu, cannot "
3494                                     "continue.", cpu->cpu_id);
3495                         } else {
3496                                 /*
3497                                  * If we reached here on the boot CPU, it's also
3498                                  * almost certain that we'll reach here on the
3499                                  * non-boot CPUs. When we're here on a boot CPU
3500                                  * we should disable the feature, on a non-boot
3501                                  * CPU we need to confirm that we have.
3502                                  */
3503                                 if (cpu->cpu_id == 0) {
3504                                         remove_x86_feature(x86_featureset,
3505                                             X86FSET_XSAVE);
3506                                         remove_x86_feature(x86_featureset,
3507                                             X86FSET_AVX);
3508                                         remove_x86_feature(x86_featureset,
3509                                             X86FSET_F16C);
3510                                         remove_x86_feature(x86_featureset,
3511                                             X86FSET_BMI1);
3512                                         remove_x86_feature(x86_featureset,
3513                                             X86FSET_BMI2);
3514                                         remove_x86_feature(x86_featureset,
3515                                             X86FSET_FMA);
3516                                         remove_x86_feature(x86_featureset,
3517                                             X86FSET_AVX2);
3518                                         remove_x86_feature(x86_featureset,
3519                                             X86FSET_MPX);
3520                                         remove_x86_feature(x86_featureset,
3521                                             X86FSET_AVX512F);
3522                                         remove_x86_feature(x86_featureset,
3523                                             X86FSET_AVX512DQ);
3524                                         remove_x86_feature(x86_featureset,
3525                                             X86FSET_AVX512PF);
3526                                         remove_x86_feature(x86_featureset,
3527                                             X86FSET_AVX512ER);
3528                                         remove_x86_feature(x86_featureset,
3529                                             X86FSET_AVX512CD);
3530                                         remove_x86_feature(x86_featureset,
3531                                             X86FSET_AVX512BW);
3532                                         remove_x86_feature(x86_featureset,
3533                                             X86FSET_AVX512VL);
3534                                         remove_x86_feature(x86_featureset,
3535                                             X86FSET_AVX512FMA);
3536                                         remove_x86_feature(x86_featureset,
3537                                             X86FSET_AVX512VBMI);
3538                                         remove_x86_feature(x86_featureset,
3539                                             X86FSET_AVX512VNNI);
3540                                         remove_x86_feature(x86_featureset,
3541                                             X86FSET_AVX512VPOPCDQ);
3542                                         remove_x86_feature(x86_featureset,
3543                                             X86FSET_AVX512NNIW);
3544                                         remove_x86_feature(x86_featureset,
3545                                             X86FSET_AVX512FMAPS);
3546 
3547                                         CPI_FEATURES_ECX(cpi) &=
3548                                             ~CPUID_INTC_ECX_XSAVE;
3549                                         CPI_FEATURES_ECX(cpi) &=
3550                                             ~CPUID_INTC_ECX_AVX;
3551                                         CPI_FEATURES_ECX(cpi) &=
3552                                             ~CPUID_INTC_ECX_F16C;
3553                                         CPI_FEATURES_ECX(cpi) &=
3554                                             ~CPUID_INTC_ECX_FMA;
3555                                         CPI_FEATURES_7_0_EBX(cpi) &=
3556                                             ~CPUID_INTC_EBX_7_0_BMI1;
3557                                         CPI_FEATURES_7_0_EBX(cpi) &=
3558                                             ~CPUID_INTC_EBX_7_0_BMI2;
3559                                         CPI_FEATURES_7_0_EBX(cpi) &=
3560                                             ~CPUID_INTC_EBX_7_0_AVX2;
3561                                         CPI_FEATURES_7_0_EBX(cpi) &=
3562                                             ~CPUID_INTC_EBX_7_0_MPX;
3563                                         CPI_FEATURES_7_0_EBX(cpi) &=
3564                                             ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3565 
3566                                         CPI_FEATURES_7_0_ECX(cpi) &=
3567                                             ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3568 
3569                                         CPI_FEATURES_7_0_EDX(cpi) &=
3570                                             ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3571 
3572                                         xsave_force_disable = B_TRUE;
3573                                 } else {
3574                                         VERIFY(is_x86_feature(x86_featureset,
3575                                             X86FSET_XSAVE) == B_FALSE);
3576                                 }
3577                         }
3578                 }
3579         }
3580 
3581 
3582         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3583                 goto pass2_done;
3584 
3585         if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3586                 nmax = NMAX_CPI_EXTD;
3587         /*
3588          * Copy the extended properties, fixing them as we go.
3589          * (We already handled n == 0 and n == 1 in pass 1)
3590          */
3591         iptr = (void *)cpi->cpi_brandstr;
3592         for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3593                 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3594                 (void) __cpuid_insn(cp);
3595                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3596                     cp);
3597                 switch (n) {
3598                 case 2:
3599                 case 3:
3600                 case 4:
3601                         /*
3602                          * Extract the brand string
3603                          */
3604                         *iptr++ = cp->cp_eax;
3605                         *iptr++ = cp->cp_ebx;
3606                         *iptr++ = cp->cp_ecx;
3607                         *iptr++ = cp->cp_edx;
3608                         break;
3609                 case 5:
3610                         switch (cpi->cpi_vendor) {
3611                         case X86_VENDOR_AMD:
3612                                 /*
3613                                  * The Athlon and Duron were the first
3614                                  * parts to report the sizes of the
3615                                  * TLB for large pages. Before then,
3616                                  * we don't trust the data.
3617                                  */
3618                                 if (cpi->cpi_family < 6 ||
3619                                     (cpi->cpi_family == 6 &&
3620                                     cpi->cpi_model < 1))
3621                                         cp->cp_eax = 0;
3622                                 break;
3623                         default:
3624                                 break;
3625                         }
3626                         break;
3627                 case 6:
3628                         switch (cpi->cpi_vendor) {
3629                         case X86_VENDOR_AMD:
3630                                 /*
3631                                  * The Athlon and Duron were the first
3632                                  * AMD parts with L2 TLB's.
3633                                  * Before then, don't trust the data.
3634                                  */
3635                                 if (cpi->cpi_family < 6 ||
3636                                     cpi->cpi_family == 6 &&
3637                                     cpi->cpi_model < 1)
3638                                         cp->cp_eax = cp->cp_ebx = 0;
3639                                 /*
3640                                  * AMD Duron rev A0 reports L2
3641                                  * cache size incorrectly as 1K
3642                                  * when it is really 64K
3643                                  */
3644                                 if (cpi->cpi_family == 6 &&
3645                                     cpi->cpi_model == 3 &&
3646                                     cpi->cpi_step == 0) {
3647                                         cp->cp_ecx &= 0xffff;
3648                                         cp->cp_ecx |= 0x400000;
3649                                 }
3650                                 break;
3651                         case X86_VENDOR_Cyrix:  /* VIA C3 */
3652                                 /*
3653                                  * VIA C3 processors are a bit messed
3654                                  * up w.r.t. encoding cache sizes in %ecx
3655                                  */
3656                                 if (cpi->cpi_family != 6)
3657                                         break;
3658                                 /*
3659                                  * model 7 and 8 were incorrectly encoded
3660                                  *
3661                                  * xxx is model 8 really broken?
3662                                  */
3663                                 if (cpi->cpi_model == 7 ||
3664                                     cpi->cpi_model == 8)
3665                                         cp->cp_ecx =
3666                                             BITX(cp->cp_ecx, 31, 24) << 16 |
3667                                             BITX(cp->cp_ecx, 23, 16) << 12 |
3668                                             BITX(cp->cp_ecx, 15, 8) << 8 |
3669                                             BITX(cp->cp_ecx, 7, 0);
3670                                 /*
3671                                  * model 9 stepping 1 has wrong associativity
3672                                  */
3673                                 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3674                                         cp->cp_ecx |= 8 << 12;
3675                                 break;
3676                         case X86_VENDOR_Intel:
3677                                 /*
3678                                  * Extended L2 Cache features function.
3679                                  * First appeared on Prescott.
3680                                  */
3681                         default:
3682                                 break;
3683                         }
3684                         break;
3685                 default:
3686                         break;
3687                 }
3688         }
3689 
3690 pass2_done:
3691         cpi->cpi_pass = 2;
3692 }
3693 
3694 static const char *
3695 intel_cpubrand(const struct cpuid_info *cpi)
3696 {
3697         int i;
3698 
3699         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3700             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3701                 return ("i486");
3702 
3703         switch (cpi->cpi_family) {
3704         case 5:
3705                 return ("Intel Pentium(r)");
3706         case 6:
3707                 switch (cpi->cpi_model) {
3708                         uint_t celeron, xeon;
3709                         const struct cpuid_regs *cp;
3710                 case 0:
3711                 case 1:
3712                 case 2:
3713                         return ("Intel Pentium(r) Pro");
3714                 case 3:
3715                 case 4:
3716                         return ("Intel Pentium(r) II");
3717                 case 6:
3718                         return ("Intel Celeron(r)");
3719                 case 5:
3720                 case 7:
3721                         celeron = xeon = 0;
3722                         cp = &cpi->cpi_std[2];   /* cache info */
3723 
3724                         for (i = 1; i < 4; i++) {
3725                                 uint_t tmp;
3726 
3727                                 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3728                                 if (tmp == 0x40)
3729                                         celeron++;
3730                                 if (tmp >= 0x44 && tmp <= 0x45)
3731                                         xeon++;
3732                         }
3733 
3734                         for (i = 0; i < 2; i++) {
3735                                 uint_t tmp;
3736 
3737                                 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3738                                 if (tmp == 0x40)
3739                                         celeron++;
3740                                 else if (tmp >= 0x44 && tmp <= 0x45)
3741                                         xeon++;
3742                         }
3743 
3744                         for (i = 0; i < 4; i++) {
3745                                 uint_t tmp;
3746 
3747                                 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3748                                 if (tmp == 0x40)
3749                                         celeron++;
3750                                 else if (tmp >= 0x44 && tmp <= 0x45)
3751                                         xeon++;
3752                         }
3753 
3754                         for (i = 0; i < 4; i++) {
3755                                 uint_t tmp;
3756 
3757                                 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3758                                 if (tmp == 0x40)
3759                                         celeron++;
3760                                 else if (tmp >= 0x44 && tmp <= 0x45)
3761                                         xeon++;
3762                         }
3763 
3764                         if (celeron)
3765                                 return ("Intel Celeron(r)");
3766                         if (xeon)
3767                                 return (cpi->cpi_model == 5 ?
3768                                     "Intel Pentium(r) II Xeon(tm)" :
3769                                     "Intel Pentium(r) III Xeon(tm)");
3770                         return (cpi->cpi_model == 5 ?
3771                             "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3772                             "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3773                 default:
3774                         break;
3775                 }
3776         default:
3777                 break;
3778         }
3779 
3780         /* BrandID is present if the field is nonzero */
3781         if (cpi->cpi_brandid != 0) {
3782                 static const struct {
3783                         uint_t bt_bid;
3784                         const char *bt_str;
3785                 } brand_tbl[] = {
3786                         { 0x1,  "Intel(r) Celeron(r)" },
3787                         { 0x2,  "Intel(r) Pentium(r) III" },
3788                         { 0x3,  "Intel(r) Pentium(r) III Xeon(tm)" },
3789                         { 0x4,  "Intel(r) Pentium(r) III" },
3790                         { 0x6,  "Mobile Intel(r) Pentium(r) III" },
3791                         { 0x7,  "Mobile Intel(r) Celeron(r)" },
3792                         { 0x8,  "Intel(r) Pentium(r) 4" },
3793                         { 0x9,  "Intel(r) Pentium(r) 4" },
3794                         { 0xa,  "Intel(r) Celeron(r)" },
3795                         { 0xb,  "Intel(r) Xeon(tm)" },
3796                         { 0xc,  "Intel(r) Xeon(tm) MP" },
3797                         { 0xe,  "Mobile Intel(r) Pentium(r) 4" },
3798                         { 0xf,  "Mobile Intel(r) Celeron(r)" },
3799                         { 0x11, "Mobile Genuine Intel(r)" },
3800                         { 0x12, "Intel(r) Celeron(r) M" },
3801                         { 0x13, "Mobile Intel(r) Celeron(r)" },
3802                         { 0x14, "Intel(r) Celeron(r)" },
3803                         { 0x15, "Mobile Genuine Intel(r)" },
3804                         { 0x16, "Intel(r) Pentium(r) M" },
3805                         { 0x17, "Mobile Intel(r) Celeron(r)" }
3806                 };
3807                 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3808                 uint_t sgn;
3809 
3810                 sgn = (cpi->cpi_family << 8) |
3811                     (cpi->cpi_model << 4) | cpi->cpi_step;
3812 
3813                 for (i = 0; i < btblmax; i++)
3814                         if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3815                                 break;
3816                 if (i < btblmax) {
3817                         if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3818                                 return ("Intel(r) Celeron(r)");
3819                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3820                                 return ("Intel(r) Xeon(tm) MP");
3821                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3822                                 return ("Intel(r) Xeon(tm)");
3823                         return (brand_tbl[i].bt_str);
3824                 }
3825         }
3826 
3827         return (NULL);
3828 }
3829 
3830 static const char *
3831 amd_cpubrand(const struct cpuid_info *cpi)
3832 {
3833         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3834             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3835                 return ("i486 compatible");
3836 
3837         switch (cpi->cpi_family) {
3838         case 5:
3839                 switch (cpi->cpi_model) {
3840                 case 0:
3841                 case 1:
3842                 case 2:
3843                 case 3:
3844                 case 4:
3845                 case 5:
3846                         return ("AMD-K5(r)");
3847                 case 6:
3848                 case 7:
3849                         return ("AMD-K6(r)");
3850                 case 8:
3851                         return ("AMD-K6(r)-2");
3852                 case 9:
3853                         return ("AMD-K6(r)-III");
3854                 default:
3855                         return ("AMD (family 5)");
3856                 }
3857         case 6:
3858                 switch (cpi->cpi_model) {
3859                 case 1:
3860                         return ("AMD-K7(tm)");
3861                 case 0:
3862                 case 2:
3863                 case 4:
3864                         return ("AMD Athlon(tm)");
3865                 case 3:
3866                 case 7:
3867                         return ("AMD Duron(tm)");
3868                 case 6:
3869                 case 8:
3870                 case 10:
3871                         /*
3872                          * Use the L2 cache size to distinguish
3873                          */
3874                         return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3875                             "AMD Athlon(tm)" : "AMD Duron(tm)");
3876                 default:
3877                         return ("AMD (family 6)");
3878                 }
3879         default:
3880                 break;
3881         }
3882 
3883         if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3884             cpi->cpi_brandid != 0) {
3885                 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3886                 case 3:
3887                         return ("AMD Opteron(tm) UP 1xx");
3888                 case 4:
3889                         return ("AMD Opteron(tm) DP 2xx");
3890                 case 5:
3891                         return ("AMD Opteron(tm) MP 8xx");
3892                 default:
3893                         return ("AMD Opteron(tm)");
3894                 }
3895         }
3896 
3897         return (NULL);
3898 }
3899 
3900 static const char *
3901 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3902 {
3903         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3904             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3905             type == X86_TYPE_CYRIX_486)
3906                 return ("i486 compatible");
3907 
3908         switch (type) {
3909         case X86_TYPE_CYRIX_6x86:
3910                 return ("Cyrix 6x86");
3911         case X86_TYPE_CYRIX_6x86L:
3912                 return ("Cyrix 6x86L");
3913         case X86_TYPE_CYRIX_6x86MX:
3914                 return ("Cyrix 6x86MX");
3915         case X86_TYPE_CYRIX_GXm:
3916                 return ("Cyrix GXm");
3917         case X86_TYPE_CYRIX_MediaGX:
3918                 return ("Cyrix MediaGX");
3919         case X86_TYPE_CYRIX_MII:
3920                 return ("Cyrix M2");
3921         case X86_TYPE_VIA_CYRIX_III:
3922                 return ("VIA Cyrix M3");
3923         default:
3924                 /*
3925                  * Have another wild guess ..
3926                  */
3927                 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3928                         return ("Cyrix 5x86");
3929                 else if (cpi->cpi_family == 5) {
3930                         switch (cpi->cpi_model) {
3931                         case 2:
3932                                 return ("Cyrix 6x86");  /* Cyrix M1 */
3933                         case 4:
3934                                 return ("Cyrix MediaGX");
3935                         default:
3936                                 break;
3937                         }
3938                 } else if (cpi->cpi_family == 6) {
3939                         switch (cpi->cpi_model) {
3940                         case 0:
3941                                 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3942                         case 5:
3943                         case 6:
3944                         case 7:
3945                         case 8:
3946                         case 9:
3947                                 return ("VIA C3");
3948                         default:
3949                                 break;
3950                         }
3951                 }
3952                 break;
3953         }
3954         return (NULL);
3955 }
3956 
3957 /*
3958  * This only gets called in the case that the CPU extended
3959  * feature brand string (0x80000002, 0x80000003, 0x80000004)
3960  * aren't available, or contain null bytes for some reason.
3961  */
3962 static void
3963 fabricate_brandstr(struct cpuid_info *cpi)
3964 {
3965         const char *brand = NULL;
3966 
3967         switch (cpi->cpi_vendor) {
3968         case X86_VENDOR_Intel:
3969                 brand = intel_cpubrand(cpi);
3970                 break;
3971         case X86_VENDOR_AMD:
3972                 brand = amd_cpubrand(cpi);
3973                 break;
3974         case X86_VENDOR_Cyrix:
3975                 brand = cyrix_cpubrand(cpi, x86_type);
3976                 break;
3977         case X86_VENDOR_NexGen:
3978                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3979                         brand = "NexGen Nx586";
3980                 break;
3981         case X86_VENDOR_Centaur:
3982                 if (cpi->cpi_family == 5)
3983                         switch (cpi->cpi_model) {
3984                         case 4:
3985                                 brand = "Centaur C6";
3986                                 break;
3987                         case 8:
3988                                 brand = "Centaur C2";
3989                                 break;
3990                         case 9:
3991                                 brand = "Centaur C3";
3992                                 break;
3993                         default:
3994                                 break;
3995                         }
3996                 break;
3997         case X86_VENDOR_Rise:
3998                 if (cpi->cpi_family == 5 &&
3999                     (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4000                         brand = "Rise mP6";
4001                 break;
4002         case X86_VENDOR_SiS:
4003                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4004                         brand = "SiS 55x";
4005                 break;
4006         case X86_VENDOR_TM:
4007                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4008                         brand = "Transmeta Crusoe TM3x00 or TM5x00";
4009                 break;
4010         case X86_VENDOR_NSC:
4011         case X86_VENDOR_UMC:
4012         default:
4013                 break;
4014         }
4015         if (brand) {
4016                 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4017                 return;
4018         }
4019 
4020         /*
4021          * If all else fails ...
4022          */
4023         (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4024             "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4025             cpi->cpi_model, cpi->cpi_step);
4026 }
4027 
4028 /*
4029  * This routine is called just after kernel memory allocation
4030  * becomes available on cpu0, and as part of mp_startup() on
4031  * the other cpus.
4032  *
4033  * Fixup the brand string, and collect any information from cpuid
4034  * that requires dynamically allocated storage to represent.
4035  */
4036 /*ARGSUSED*/
4037 void
4038 cpuid_pass3(cpu_t *cpu)
4039 {
4040         int     i, max, shft, level, size;
4041         struct cpuid_regs regs;
4042         struct cpuid_regs *cp;
4043         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4044 
4045         ASSERT(cpi->cpi_pass == 2);
4046 
4047         /*
4048          * Deterministic cache parameters
4049          *
4050          * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4051          * values that are present are currently defined to be the same. This
4052          * means we can use the same logic to parse it as long as we use the
4053          * appropriate leaf to get the data. If you're updating this, make sure
4054          * you're careful about which vendor supports which aspect.
4055          *
4056          * Take this opportunity to detect the number of threads sharing the
4057          * last level cache, and construct a corresponding cache id. The
4058          * respective cpuid_info members are initialized to the default case of
4059          * "no last level cache sharing".
4060          */
4061         cpi->cpi_ncpu_shr_last_cache = 1;
4062         cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4063 
4064         if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4065             (cpi->cpi_vendor == X86_VENDOR_AMD &&
4066             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4067             is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4068                 uint32_t leaf;
4069 
4070                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4071                         leaf = 4;
4072                 } else {
4073                         leaf = CPUID_LEAF_EXT_1d;
4074                 }
4075 
4076                 /*
4077                  * Find the # of elements (size) returned by the leaf and along
4078                  * the way detect last level cache sharing details.
4079                  */
4080                 bzero(&regs, sizeof (regs));
4081                 cp = &regs;
4082                 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4083                         cp->cp_eax = leaf;
4084                         cp->cp_ecx = i;
4085 
4086                         (void) __cpuid_insn(cp);
4087 
4088                         if (CPI_CACHE_TYPE(cp) == 0)
4089                                 break;
4090                         level = CPI_CACHE_LVL(cp);
4091                         if (level > max) {
4092                                 max = level;
4093                                 cpi->cpi_ncpu_shr_last_cache =
4094                                     CPI_NTHR_SHR_CACHE(cp) + 1;
4095                         }
4096                 }
4097                 cpi->cpi_cache_leaf_size = size = i;
4098 
4099                 /*
4100                  * Allocate the cpi_cache_leaves array. The first element
4101                  * references the regs for the corresponding leaf with %ecx set
4102                  * to 0. This was gathered in cpuid_pass2().
4103                  */
4104                 if (size > 0) {
4105                         cpi->cpi_cache_leaves =
4106                             kmem_alloc(size * sizeof (cp), KM_SLEEP);
4107                         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4108                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4109                         } else {
4110                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4111                         }
4112 
4113                         /*
4114                          * Allocate storage to hold the additional regs
4115                          * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4116                          *
4117                          * The regs for the leaf, %ecx == 0 has already
4118                          * been allocated as indicated above.
4119                          */
4120                         for (i = 1; i < size; i++) {
4121                                 cp = cpi->cpi_cache_leaves[i] =
4122                                     kmem_zalloc(sizeof (regs), KM_SLEEP);
4123                                 cp->cp_eax = leaf;
4124                                 cp->cp_ecx = i;
4125 
4126                                 (void) __cpuid_insn(cp);
4127                         }
4128                 }
4129                 /*
4130                  * Determine the number of bits needed to represent
4131                  * the number of CPUs sharing the last level cache.
4132                  *
4133                  * Shift off that number of bits from the APIC id to
4134                  * derive the cache id.
4135                  */
4136                 shft = 0;
4137                 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4138                         shft++;
4139                 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4140         }
4141 
4142         /*
4143          * Now fixup the brand string
4144          */
4145         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4146                 fabricate_brandstr(cpi);
4147         } else {
4148 
4149                 /*
4150                  * If we successfully extracted a brand string from the cpuid
4151                  * instruction, clean it up by removing leading spaces and
4152                  * similar junk.
4153                  */
4154                 if (cpi->cpi_brandstr[0]) {
4155                         size_t maxlen = sizeof (cpi->cpi_brandstr);
4156                         char *src, *dst;
4157 
4158                         dst = src = (char *)cpi->cpi_brandstr;
4159                         src[maxlen - 1] = '\0';
4160                         /*
4161                          * strip leading spaces
4162                          */
4163                         while (*src == ' ')
4164                                 src++;
4165                         /*
4166                          * Remove any 'Genuine' or "Authentic" prefixes
4167                          */
4168                         if (strncmp(src, "Genuine ", 8) == 0)
4169                                 src += 8;
4170                         if (strncmp(src, "Authentic ", 10) == 0)
4171                                 src += 10;
4172 
4173                         /*
4174                          * Now do an in-place copy.
4175                          * Map (R) to (r) and (TM) to (tm).
4176                          * The era of teletypes is long gone, and there's
4177                          * -really- no need to shout.
4178                          */
4179                         while (*src != '\0') {
4180                                 if (src[0] == '(') {
4181                                         if (strncmp(src + 1, "R)", 2) == 0) {
4182                                                 (void) strncpy(dst, "(r)", 3);
4183                                                 src += 3;
4184                                                 dst += 3;
4185                                                 continue;
4186                                         }
4187                                         if (strncmp(src + 1, "TM)", 3) == 0) {
4188                                                 (void) strncpy(dst, "(tm)", 4);
4189                                                 src += 4;
4190                                                 dst += 4;
4191                                                 continue;
4192                                         }
4193                                 }
4194                                 *dst++ = *src++;
4195                         }
4196                         *dst = '\0';
4197 
4198                         /*
4199                          * Finally, remove any trailing spaces
4200                          */
4201                         while (--dst > cpi->cpi_brandstr)
4202                                 if (*dst == ' ')
4203                                         *dst = '\0';
4204                                 else
4205                                         break;
4206                 } else
4207                         fabricate_brandstr(cpi);
4208         }
4209         cpi->cpi_pass = 3;
4210 }
4211 
4212 /*
4213  * This routine is called out of bind_hwcap() much later in the life
4214  * of the kernel (post_startup()).  The job of this routine is to resolve
4215  * the hardware feature support and kernel support for those features into
4216  * what we're actually going to tell applications via the aux vector.
4217  */
4218 void
4219 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4220 {
4221         struct cpuid_info *cpi;
4222         uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4223 
4224         if (cpu == NULL)
4225                 cpu = CPU;
4226         cpi = cpu->cpu_m.mcpu_cpi;
4227 
4228         ASSERT(cpi->cpi_pass == 3);
4229 
4230         if (cpi->cpi_maxeax >= 1) {
4231                 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4232                 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4233                 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4234 
4235                 *edx = CPI_FEATURES_EDX(cpi);
4236                 *ecx = CPI_FEATURES_ECX(cpi);
4237                 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4238 
4239                 /*
4240                  * [these require explicit kernel support]
4241                  */
4242                 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4243                         *edx &= ~CPUID_INTC_EDX_SEP;
4244 
4245                 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4246                         *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4247                 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4248                         *edx &= ~CPUID_INTC_EDX_SSE2;
4249 
4250                 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4251                         *edx &= ~CPUID_INTC_EDX_HTT;
4252 
4253                 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4254                         *ecx &= ~CPUID_INTC_ECX_SSE3;
4255 
4256                 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4257                         *ecx &= ~CPUID_INTC_ECX_SSSE3;
4258                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4259                         *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4260                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4261                         *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4262                 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4263                         *ecx &= ~CPUID_INTC_ECX_AES;
4264                 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4265                         *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4266                 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4267                         *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4268                             CPUID_INTC_ECX_OSXSAVE);
4269                 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4270                         *ecx &= ~CPUID_INTC_ECX_AVX;
4271                 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4272                         *ecx &= ~CPUID_INTC_ECX_F16C;
4273                 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4274                         *ecx &= ~CPUID_INTC_ECX_FMA;
4275                 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4276                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4277                 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4278                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4279                 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4280                         *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4281                 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4282                         *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4283                 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4284                         *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4285 
4286                 /*
4287                  * [no explicit support required beyond x87 fp context]
4288                  */
4289                 if (!fpu_exists)
4290                         *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4291 
4292                 /*
4293                  * Now map the supported feature vector to things that we
4294                  * think userland will care about.
4295                  */
4296                 if (*edx & CPUID_INTC_EDX_SEP)
4297                         hwcap_flags |= AV_386_SEP;
4298                 if (*edx & CPUID_INTC_EDX_SSE)
4299                         hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4300                 if (*edx & CPUID_INTC_EDX_SSE2)
4301                         hwcap_flags |= AV_386_SSE2;
4302                 if (*ecx & CPUID_INTC_ECX_SSE3)
4303                         hwcap_flags |= AV_386_SSE3;
4304                 if (*ecx & CPUID_INTC_ECX_SSSE3)
4305                         hwcap_flags |= AV_386_SSSE3;
4306                 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4307                         hwcap_flags |= AV_386_SSE4_1;
4308                 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4309                         hwcap_flags |= AV_386_SSE4_2;
4310                 if (*ecx & CPUID_INTC_ECX_MOVBE)
4311                         hwcap_flags |= AV_386_MOVBE;
4312                 if (*ecx & CPUID_INTC_ECX_AES)
4313                         hwcap_flags |= AV_386_AES;
4314                 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4315                         hwcap_flags |= AV_386_PCLMULQDQ;
4316                 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4317                     (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4318                         hwcap_flags |= AV_386_XSAVE;
4319 
4320                         if (*ecx & CPUID_INTC_ECX_AVX) {
4321                                 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4322                                 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4323 
4324                                 hwcap_flags |= AV_386_AVX;
4325                                 if (*ecx & CPUID_INTC_ECX_F16C)
4326                                         hwcap_flags_2 |= AV_386_2_F16C;
4327                                 if (*ecx & CPUID_INTC_ECX_FMA)
4328                                         hwcap_flags_2 |= AV_386_2_FMA;
4329 
4330                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4331                                         hwcap_flags_2 |= AV_386_2_BMI1;
4332                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4333                                         hwcap_flags_2 |= AV_386_2_BMI2;
4334                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4335                                         hwcap_flags_2 |= AV_386_2_AVX2;
4336                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4337                                         hwcap_flags_2 |= AV_386_2_AVX512F;
4338                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4339                                         hwcap_flags_2 |= AV_386_2_AVX512DQ;
4340                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4341                                         hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4342                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4343                                         hwcap_flags_2 |= AV_386_2_AVX512PF;
4344                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4345                                         hwcap_flags_2 |= AV_386_2_AVX512ER;
4346                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4347                                         hwcap_flags_2 |= AV_386_2_AVX512CD;
4348                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4349                                         hwcap_flags_2 |= AV_386_2_AVX512BW;
4350                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4351                                         hwcap_flags_2 |= AV_386_2_AVX512VL;
4352 
4353                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4354                                         hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4355                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4356                                         hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4357                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4358                                         hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4359 
4360                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4361                                         hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4362                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4363                                         hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4364                         }
4365                 }
4366                 if (*ecx & CPUID_INTC_ECX_VMX)
4367                         hwcap_flags |= AV_386_VMX;
4368                 if (*ecx & CPUID_INTC_ECX_POPCNT)
4369                         hwcap_flags |= AV_386_POPCNT;
4370                 if (*edx & CPUID_INTC_EDX_FPU)
4371                         hwcap_flags |= AV_386_FPU;
4372                 if (*edx & CPUID_INTC_EDX_MMX)
4373                         hwcap_flags |= AV_386_MMX;
4374 
4375                 if (*edx & CPUID_INTC_EDX_TSC)
4376                         hwcap_flags |= AV_386_TSC;
4377                 if (*edx & CPUID_INTC_EDX_CX8)
4378                         hwcap_flags |= AV_386_CX8;
4379                 if (*edx & CPUID_INTC_EDX_CMOV)
4380                         hwcap_flags |= AV_386_CMOV;
4381                 if (*ecx & CPUID_INTC_ECX_CX16)
4382                         hwcap_flags |= AV_386_CX16;
4383 
4384                 if (*ecx & CPUID_INTC_ECX_RDRAND)
4385                         hwcap_flags_2 |= AV_386_2_RDRAND;
4386                 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4387                         hwcap_flags_2 |= AV_386_2_ADX;
4388                 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4389                         hwcap_flags_2 |= AV_386_2_RDSEED;
4390                 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4391                         hwcap_flags_2 |= AV_386_2_SHA;
4392                 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4393                         hwcap_flags_2 |= AV_386_2_FSGSBASE;
4394                 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4395                         hwcap_flags_2 |= AV_386_2_CLWB;
4396                 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4397                         hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4398 
4399         }
4400         /*
4401          * Check a few miscilaneous features.
4402          */
4403         if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4404                 hwcap_flags_2 |= AV_386_2_CLZERO;
4405 
4406         if (cpi->cpi_xmaxeax < 0x80000001)
4407                 goto pass4_done;
4408 
4409         switch (cpi->cpi_vendor) {
4410                 struct cpuid_regs cp;
4411                 uint32_t *edx, *ecx;
4412 
4413         case X86_VENDOR_Intel:
4414                 /*
4415                  * Seems like Intel duplicated what we necessary
4416                  * here to make the initial crop of 64-bit OS's work.
4417                  * Hopefully, those are the only "extended" bits
4418                  * they'll add.
4419                  */
4420                 /*FALLTHROUGH*/
4421 
4422         case X86_VENDOR_AMD:
4423                 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4424                 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4425 
4426                 *edx = CPI_FEATURES_XTD_EDX(cpi);
4427                 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4428 
4429                 /*
4430                  * [these features require explicit kernel support]
4431                  */
4432                 switch (cpi->cpi_vendor) {
4433                 case X86_VENDOR_Intel:
4434                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4435                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4436                         break;
4437 
4438                 case X86_VENDOR_AMD:
4439                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4440                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4441                         if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4442                                 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4443                         break;
4444 
4445                 default:
4446                         break;
4447                 }
4448 
4449                 /*
4450                  * [no explicit support required beyond
4451                  * x87 fp context and exception handlers]
4452                  */
4453                 if (!fpu_exists)
4454                         *edx &= ~(CPUID_AMD_EDX_MMXamd |
4455                             CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4456 
4457                 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4458                         *edx &= ~CPUID_AMD_EDX_NX;
4459 #if !defined(__amd64)
4460                 *edx &= ~CPUID_AMD_EDX_LM;
4461 #endif
4462                 /*
4463                  * Now map the supported feature vector to
4464                  * things that we think userland will care about.
4465                  */
4466 #if defined(__amd64)
4467                 if (*edx & CPUID_AMD_EDX_SYSC)
4468                         hwcap_flags |= AV_386_AMD_SYSC;
4469 #endif
4470                 if (*edx & CPUID_AMD_EDX_MMXamd)
4471                         hwcap_flags |= AV_386_AMD_MMX;
4472                 if (*edx & CPUID_AMD_EDX_3DNow)
4473                         hwcap_flags |= AV_386_AMD_3DNow;
4474                 if (*edx & CPUID_AMD_EDX_3DNowx)
4475                         hwcap_flags |= AV_386_AMD_3DNowx;
4476                 if (*ecx & CPUID_AMD_ECX_SVM)
4477                         hwcap_flags |= AV_386_AMD_SVM;
4478 
4479                 switch (cpi->cpi_vendor) {
4480                 case X86_VENDOR_AMD:
4481                         if (*edx & CPUID_AMD_EDX_TSCP)
4482                                 hwcap_flags |= AV_386_TSCP;
4483                         if (*ecx & CPUID_AMD_ECX_AHF64)
4484                                 hwcap_flags |= AV_386_AHF;
4485                         if (*ecx & CPUID_AMD_ECX_SSE4A)
4486                                 hwcap_flags |= AV_386_AMD_SSE4A;
4487                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4488                                 hwcap_flags |= AV_386_AMD_LZCNT;
4489                         if (*ecx & CPUID_AMD_ECX_MONITORX)
4490                                 hwcap_flags_2 |= AV_386_2_MONITORX;
4491                         break;
4492 
4493                 case X86_VENDOR_Intel:
4494                         if (*edx & CPUID_AMD_EDX_TSCP)
4495                                 hwcap_flags |= AV_386_TSCP;
4496                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4497                                 hwcap_flags |= AV_386_AMD_LZCNT;
4498                         /*
4499                          * Aarrgh.
4500                          * Intel uses a different bit in the same word.
4501                          */
4502                         if (*ecx & CPUID_INTC_ECX_AHF64)
4503                                 hwcap_flags |= AV_386_AHF;
4504                         break;
4505 
4506                 default:
4507                         break;
4508                 }
4509                 break;
4510 
4511         case X86_VENDOR_TM:
4512                 cp.cp_eax = 0x80860001;
4513                 (void) __cpuid_insn(&cp);
4514                 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4515                 break;
4516 
4517         default:
4518                 break;
4519         }
4520 
4521 pass4_done:
4522         cpi->cpi_pass = 4;
4523         if (hwcap_out != NULL) {
4524                 hwcap_out[0] = hwcap_flags;
4525                 hwcap_out[1] = hwcap_flags_2;
4526         }
4527 }
4528 
4529 
4530 /*
4531  * Simulate the cpuid instruction using the data we previously
4532  * captured about this CPU.  We try our best to return the truth
4533  * about the hardware, independently of kernel support.
4534  */
4535 uint32_t
4536 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4537 {
4538         struct cpuid_info *cpi;
4539         struct cpuid_regs *xcp;
4540 
4541         if (cpu == NULL)
4542                 cpu = CPU;
4543         cpi = cpu->cpu_m.mcpu_cpi;
4544 
4545         ASSERT(cpuid_checkpass(cpu, 3));
4546 
4547         /*
4548          * CPUID data is cached in two separate places: cpi_std for standard
4549          * CPUID leaves , and cpi_extd for extended CPUID leaves.
4550          */
4551         if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4552                 xcp = &cpi->cpi_std[cp->cp_eax];
4553         } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4554             cp->cp_eax <= cpi->cpi_xmaxeax &&
4555             cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4556                 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4557         } else {
4558                 /*
4559                  * The caller is asking for data from an input parameter which
4560                  * the kernel has not cached.  In this case we go fetch from
4561                  * the hardware and return the data directly to the user.
4562                  */
4563                 return (__cpuid_insn(cp));
4564         }
4565 
4566         cp->cp_eax = xcp->cp_eax;
4567         cp->cp_ebx = xcp->cp_ebx;
4568         cp->cp_ecx = xcp->cp_ecx;
4569         cp->cp_edx = xcp->cp_edx;
4570         return (cp->cp_eax);
4571 }
4572 
4573 int
4574 cpuid_checkpass(cpu_t *cpu, int pass)
4575 {
4576         return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4577             cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4578 }
4579 
4580 int
4581 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4582 {
4583         ASSERT(cpuid_checkpass(cpu, 3));
4584 
4585         return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4586 }
4587 
4588 int
4589 cpuid_is_cmt(cpu_t *cpu)
4590 {
4591         if (cpu == NULL)
4592                 cpu = CPU;
4593 
4594         ASSERT(cpuid_checkpass(cpu, 1));
4595 
4596         return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4597 }
4598 
4599 /*
4600  * AMD and Intel both implement the 64-bit variant of the syscall
4601  * instruction (syscallq), so if there's -any- support for syscall,
4602  * cpuid currently says "yes, we support this".
4603  *
4604  * However, Intel decided to -not- implement the 32-bit variant of the
4605  * syscall instruction, so we provide a predicate to allow our caller
4606  * to test that subtlety here.
4607  *
4608  * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4609  *      even in the case where the hardware would in fact support it.
4610  */
4611 /*ARGSUSED*/
4612 int
4613 cpuid_syscall32_insn(cpu_t *cpu)
4614 {
4615         ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4616 
4617 #if !defined(__xpv)
4618         if (cpu == NULL)
4619                 cpu = CPU;
4620 
4621         /*CSTYLED*/
4622         {
4623                 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4624 
4625                 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4626                     cpi->cpi_xmaxeax >= 0x80000001 &&
4627                     (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4628                         return (1);
4629         }
4630 #endif
4631         return (0);
4632 }
4633 
4634 int
4635 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4636 {
4637         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4638 
4639         static const char fmt[] =
4640             "x86 (%s %X family %d model %d step %d clock %d MHz)";
4641         static const char fmt_ht[] =
4642             "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4643 
4644         ASSERT(cpuid_checkpass(cpu, 1));
4645 
4646         if (cpuid_is_cmt(cpu))
4647                 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4648                     cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4649                     cpi->cpi_family, cpi->cpi_model,
4650                     cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4651         return (snprintf(s, n, fmt,
4652             cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4653             cpi->cpi_family, cpi->cpi_model,
4654             cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4655 }
4656 
4657 const char *
4658 cpuid_getvendorstr(cpu_t *cpu)
4659 {
4660         ASSERT(cpuid_checkpass(cpu, 1));
4661         return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4662 }
4663 
4664 uint_t
4665 cpuid_getvendor(cpu_t *cpu)
4666 {
4667         ASSERT(cpuid_checkpass(cpu, 1));
4668         return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4669 }
4670 
4671 uint_t
4672 cpuid_getfamily(cpu_t *cpu)
4673 {
4674         ASSERT(cpuid_checkpass(cpu, 1));
4675         return (cpu->cpu_m.mcpu_cpi->cpi_family);
4676 }
4677 
4678 uint_t
4679 cpuid_getmodel(cpu_t *cpu)
4680 {
4681         ASSERT(cpuid_checkpass(cpu, 1));
4682         return (cpu->cpu_m.mcpu_cpi->cpi_model);
4683 }
4684 
4685 uint_t
4686 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4687 {
4688         ASSERT(cpuid_checkpass(cpu, 1));
4689         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4690 }
4691 
4692 uint_t
4693 cpuid_get_ncore_per_chip(cpu_t *cpu)
4694 {
4695         ASSERT(cpuid_checkpass(cpu, 1));
4696         return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4697 }
4698 
4699 uint_t
4700 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4701 {
4702         ASSERT(cpuid_checkpass(cpu, 2));
4703         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4704 }
4705 
4706 id_t
4707 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4708 {
4709         ASSERT(cpuid_checkpass(cpu, 2));
4710         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4711 }
4712 
4713 uint_t
4714 cpuid_getstep(cpu_t *cpu)
4715 {
4716         ASSERT(cpuid_checkpass(cpu, 1));
4717         return (cpu->cpu_m.mcpu_cpi->cpi_step);
4718 }
4719 
4720 uint_t
4721 cpuid_getsig(struct cpu *cpu)
4722 {
4723         ASSERT(cpuid_checkpass(cpu, 1));
4724         return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4725 }
4726 
4727 uint32_t
4728 cpuid_getchiprev(struct cpu *cpu)
4729 {
4730         ASSERT(cpuid_checkpass(cpu, 1));
4731         return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4732 }
4733 
4734 const char *
4735 cpuid_getchiprevstr(struct cpu *cpu)
4736 {
4737         ASSERT(cpuid_checkpass(cpu, 1));
4738         return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4739 }
4740 
4741 uint32_t
4742 cpuid_getsockettype(struct cpu *cpu)
4743 {
4744         ASSERT(cpuid_checkpass(cpu, 1));
4745         return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4746 }
4747 
4748 const char *
4749 cpuid_getsocketstr(cpu_t *cpu)
4750 {
4751         static const char *socketstr = NULL;
4752         struct cpuid_info *cpi;
4753 
4754         ASSERT(cpuid_checkpass(cpu, 1));
4755         cpi = cpu->cpu_m.mcpu_cpi;
4756 
4757         /* Assume that socket types are the same across the system */
4758         if (socketstr == NULL)
4759                 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4760                     cpi->cpi_model, cpi->cpi_step);
4761 
4762 
4763         return (socketstr);
4764 }
4765 
4766 int
4767 cpuid_get_chipid(cpu_t *cpu)
4768 {
4769         ASSERT(cpuid_checkpass(cpu, 1));
4770 
4771         if (cpuid_is_cmt(cpu))
4772                 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4773         return (cpu->cpu_id);
4774 }
4775 
4776 id_t
4777 cpuid_get_coreid(cpu_t *cpu)
4778 {
4779         ASSERT(cpuid_checkpass(cpu, 1));
4780         return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4781 }
4782 
4783 int
4784 cpuid_get_pkgcoreid(cpu_t *cpu)
4785 {
4786         ASSERT(cpuid_checkpass(cpu, 1));
4787         return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4788 }
4789 
4790 int
4791 cpuid_get_clogid(cpu_t *cpu)
4792 {
4793         ASSERT(cpuid_checkpass(cpu, 1));
4794         return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4795 }
4796 
4797 int
4798 cpuid_get_cacheid(cpu_t *cpu)
4799 {
4800         ASSERT(cpuid_checkpass(cpu, 1));
4801         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4802 }
4803 
4804 uint_t
4805 cpuid_get_procnodeid(cpu_t *cpu)
4806 {
4807         ASSERT(cpuid_checkpass(cpu, 1));
4808         return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4809 }
4810 
4811 uint_t
4812 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4813 {
4814         ASSERT(cpuid_checkpass(cpu, 1));
4815         return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4816 }
4817 
4818 uint_t
4819 cpuid_get_compunitid(cpu_t *cpu)
4820 {
4821         ASSERT(cpuid_checkpass(cpu, 1));
4822         return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4823 }
4824 
4825 uint_t
4826 cpuid_get_cores_per_compunit(cpu_t *cpu)
4827 {
4828         ASSERT(cpuid_checkpass(cpu, 1));
4829         return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4830 }
4831 
4832 /*ARGSUSED*/
4833 int
4834 cpuid_have_cr8access(cpu_t *cpu)
4835 {
4836 #if defined(__amd64)
4837         return (1);
4838 #else
4839         struct cpuid_info *cpi;
4840 
4841         ASSERT(cpu != NULL);
4842         cpi = cpu->cpu_m.mcpu_cpi;
4843         if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4844             (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4845                 return (1);
4846         return (0);
4847 #endif
4848 }
4849 
4850 uint32_t
4851 cpuid_get_apicid(cpu_t *cpu)
4852 {
4853         ASSERT(cpuid_checkpass(cpu, 1));
4854         if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4855                 return (UINT32_MAX);
4856         } else {
4857                 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4858         }
4859 }
4860 
4861 void
4862 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4863 {
4864         struct cpuid_info *cpi;
4865 
4866         if (cpu == NULL)
4867                 cpu = CPU;
4868         cpi = cpu->cpu_m.mcpu_cpi;
4869 
4870         ASSERT(cpuid_checkpass(cpu, 1));
4871 
4872         if (pabits)
4873                 *pabits = cpi->cpi_pabits;
4874         if (vabits)
4875                 *vabits = cpi->cpi_vabits;
4876 }
4877 
4878 size_t
4879 cpuid_get_xsave_size()
4880 {
4881         return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4882             sizeof (struct xsave_state)));
4883 }
4884 
4885 /*
4886  * Return true if the CPUs on this system require 'pointer clearing' for the
4887  * floating point error pointer exception handling. In the past, this has been
4888  * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4889  * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4890  * feature bit and is reflected in the cpi_fp_amd_save member.
4891  */
4892 boolean_t
4893 cpuid_need_fp_excp_handling()
4894 {
4895         return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4896             cpuid_info0.cpi_fp_amd_save != 0);
4897 }
4898 
4899 /*
4900  * Returns the number of data TLB entries for a corresponding
4901  * pagesize.  If it can't be computed, or isn't known, the
4902  * routine returns zero.  If you ask about an architecturally
4903  * impossible pagesize, the routine will panic (so that the
4904  * hat implementor knows that things are inconsistent.)
4905  */
4906 uint_t
4907 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4908 {
4909         struct cpuid_info *cpi;
4910         uint_t dtlb_nent = 0;
4911 
4912         if (cpu == NULL)
4913                 cpu = CPU;
4914         cpi = cpu->cpu_m.mcpu_cpi;
4915 
4916         ASSERT(cpuid_checkpass(cpu, 1));
4917 
4918         /*
4919          * Check the L2 TLB info
4920          */
4921         if (cpi->cpi_xmaxeax >= 0x80000006) {
4922                 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4923 
4924                 switch (pagesize) {
4925 
4926                 case 4 * 1024:
4927                         /*
4928                          * All zero in the top 16 bits of the register
4929                          * indicates a unified TLB. Size is in low 16 bits.
4930                          */
4931                         if ((cp->cp_ebx & 0xffff0000) == 0)
4932                                 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4933                         else
4934                                 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4935                         break;
4936 
4937                 case 2 * 1024 * 1024:
4938                         if ((cp->cp_eax & 0xffff0000) == 0)
4939                                 dtlb_nent = cp->cp_eax & 0x0000ffff;
4940                         else
4941                                 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4942                         break;
4943 
4944                 default:
4945                         panic("unknown L2 pagesize");
4946                         /*NOTREACHED*/
4947                 }
4948         }
4949 
4950         if (dtlb_nent != 0)
4951                 return (dtlb_nent);
4952 
4953         /*
4954          * No L2 TLB support for this size, try L1.
4955          */
4956         if (cpi->cpi_xmaxeax >= 0x80000005) {
4957                 struct cpuid_regs *cp = &cpi->cpi_extd[5];
4958 
4959                 switch (pagesize) {
4960                 case 4 * 1024:
4961                         dtlb_nent = BITX(cp->cp_ebx, 23, 16);
4962                         break;
4963                 case 2 * 1024 * 1024:
4964                         dtlb_nent = BITX(cp->cp_eax, 23, 16);
4965                         break;
4966                 default:
4967                         panic("unknown L1 d-TLB pagesize");
4968                         /*NOTREACHED*/
4969                 }
4970         }
4971 
4972         return (dtlb_nent);
4973 }
4974 
4975 /*
4976  * Return 0 if the erratum is not present or not applicable, positive
4977  * if it is, and negative if the status of the erratum is unknown.
4978  *
4979  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
4980  * Processors" #25759, Rev 3.57, August 2005
4981  */
4982 int
4983 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
4984 {
4985         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4986         uint_t eax;
4987 
4988         /*
4989          * Bail out if this CPU isn't an AMD CPU, or if it's
4990          * a legacy (32-bit) AMD CPU.
4991          */
4992         if (cpi->cpi_vendor != X86_VENDOR_AMD ||
4993             cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
4994             cpi->cpi_family == 6) {
4995                 return (0);
4996         }
4997 
4998         eax = cpi->cpi_std[1].cp_eax;
4999 
5000 #define SH_B0(eax)      (eax == 0xf40 || eax == 0xf50)
5001 #define SH_B3(eax)      (eax == 0xf51)
5002 #define B(eax)          (SH_B0(eax) || SH_B3(eax))
5003 
5004 #define SH_C0(eax)      (eax == 0xf48 || eax == 0xf58)
5005 
5006 #define SH_CG(eax)      (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5007 #define DH_CG(eax)      (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5008 #define CH_CG(eax)      (eax == 0xf82 || eax == 0xfb2)
5009 #define CG(eax)         (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5010 
5011 #define SH_D0(eax)      (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5012 #define DH_D0(eax)      (eax == 0x10fc0 || eax == 0x10ff0)
5013 #define CH_D0(eax)      (eax == 0x10f80 || eax == 0x10fb0)
5014 #define D0(eax)         (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5015 
5016 #define SH_E0(eax)      (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5017 #define JH_E1(eax)      (eax == 0x20f10)        /* JH8_E0 had 0x20f30 */
5018 #define DH_E3(eax)      (eax == 0x20fc0 || eax == 0x20ff0)
5019 #define SH_E4(eax)      (eax == 0x20f51 || eax == 0x20f71)
5020 #define BH_E4(eax)      (eax == 0x20fb1)
5021 #define SH_E5(eax)      (eax == 0x20f42)
5022 #define DH_E6(eax)      (eax == 0x20ff2 || eax == 0x20fc2)
5023 #define JH_E6(eax)      (eax == 0x20f12 || eax == 0x20f32)
5024 #define EX(eax)         (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5025                             SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5026                             DH_E6(eax) || JH_E6(eax))
5027 
5028 #define DR_AX(eax)      (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5029 #define DR_B0(eax)      (eax == 0x100f20)
5030 #define DR_B1(eax)      (eax == 0x100f21)
5031 #define DR_BA(eax)      (eax == 0x100f2a)
5032 #define DR_B2(eax)      (eax == 0x100f22)
5033 #define DR_B3(eax)      (eax == 0x100f23)
5034 #define RB_C0(eax)      (eax == 0x100f40)
5035 
5036         switch (erratum) {
5037         case 1:
5038                 return (cpi->cpi_family < 0x10);
5039         case 51:        /* what does the asterisk mean? */
5040                 return (B(eax) || SH_C0(eax) || CG(eax));
5041         case 52:
5042                 return (B(eax));
5043         case 57:
5044                 return (cpi->cpi_family <= 0x11);
5045         case 58:
5046                 return (B(eax));
5047         case 60:
5048                 return (cpi->cpi_family <= 0x11);
5049         case 61:
5050         case 62:
5051         case 63:
5052         case 64:
5053         case 65:
5054         case 66:
5055         case 68:
5056         case 69:
5057         case 70:
5058         case 71:
5059                 return (B(eax));
5060         case 72:
5061                 return (SH_B0(eax));
5062         case 74:
5063                 return (B(eax));
5064         case 75:
5065                 return (cpi->cpi_family < 0x10);
5066         case 76:
5067                 return (B(eax));
5068         case 77:
5069                 return (cpi->cpi_family <= 0x11);
5070         case 78:
5071                 return (B(eax) || SH_C0(eax));
5072         case 79:
5073                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5074         case 80:
5075         case 81:
5076         case 82:
5077                 return (B(eax));
5078         case 83:
5079                 return (B(eax) || SH_C0(eax) || CG(eax));
5080         case 85:
5081                 return (cpi->cpi_family < 0x10);
5082         case 86:
5083                 return (SH_C0(eax) || CG(eax));
5084         case 88:
5085 #if !defined(__amd64)
5086                 return (0);
5087 #else
5088                 return (B(eax) || SH_C0(eax));
5089 #endif
5090         case 89:
5091                 return (cpi->cpi_family < 0x10);
5092         case 90:
5093                 return (B(eax) || SH_C0(eax) || CG(eax));
5094         case 91:
5095         case 92:
5096                 return (B(eax) || SH_C0(eax));
5097         case 93:
5098                 return (SH_C0(eax));
5099         case 94:
5100                 return (B(eax) || SH_C0(eax) || CG(eax));
5101         case 95:
5102 #if !defined(__amd64)
5103                 return (0);
5104 #else
5105                 return (B(eax) || SH_C0(eax));
5106 #endif
5107         case 96:
5108                 return (B(eax) || SH_C0(eax) || CG(eax));
5109         case 97:
5110         case 98:
5111                 return (SH_C0(eax) || CG(eax));
5112         case 99:
5113                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5114         case 100:
5115                 return (B(eax) || SH_C0(eax));
5116         case 101:
5117         case 103:
5118                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5119         case 104:
5120                 return (SH_C0(eax) || CG(eax) || D0(eax));
5121         case 105:
5122         case 106:
5123         case 107:
5124                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5125         case 108:
5126                 return (DH_CG(eax));
5127         case 109:
5128                 return (SH_C0(eax) || CG(eax) || D0(eax));
5129         case 110:
5130                 return (D0(eax) || EX(eax));
5131         case 111:
5132                 return (CG(eax));
5133         case 112:
5134                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5135         case 113:
5136                 return (eax == 0x20fc0);
5137         case 114:
5138                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5139         case 115:
5140                 return (SH_E0(eax) || JH_E1(eax));
5141         case 116:
5142                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5143         case 117:
5144                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5145         case 118:
5146                 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5147                     JH_E6(eax));
5148         case 121:
5149                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5150         case 122:
5151                 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5152         case 123:
5153                 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5154         case 131:
5155                 return (cpi->cpi_family < 0x10);
5156         case 6336786:
5157 
5158                 /*
5159                  * Test for AdvPowerMgmtInfo.TscPStateInvariant
5160                  * if this is a K8 family or newer processor. We're testing for
5161                  * this 'erratum' to determine whether or not we have a constant
5162                  * TSC.
5163                  *
5164                  * Our current fix for this is to disable the C1-Clock ramping.
5165                  * However, this doesn't work on newer processor families nor
5166                  * does it work when virtualized as those devices don't exist.
5167                  */
5168                 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5169                         return (0);
5170                 }
5171 
5172                 if (CPI_FAMILY(cpi) == 0xf) {
5173                         struct cpuid_regs regs;
5174                         regs.cp_eax = 0x80000007;
5175                         (void) __cpuid_insn(&regs);
5176                         return (!(regs.cp_edx & 0x100));
5177                 }
5178                 return (0);
5179         case 6323525:
5180                 /*
5181                  * This erratum (K8 #147) is not present on family 10 and newer.
5182                  */
5183                 if (cpi->cpi_family >= 0x10) {
5184                         return (0);
5185                 }
5186                 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5187                     (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5188 
5189         case 6671130:
5190                 /*
5191                  * check for processors (pre-Shanghai) that do not provide
5192                  * optimal management of 1gb ptes in its tlb.
5193                  */
5194                 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5195 
5196         case 298:
5197                 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5198                     DR_B2(eax) || RB_C0(eax));
5199 
5200         case 721:
5201 #if defined(__amd64)
5202                 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5203 #else
5204                 return (0);
5205 #endif
5206 
5207         default:
5208                 return (-1);
5209 
5210         }
5211 }
5212 
5213 /*
5214  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5215  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5216  */
5217 int
5218 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5219 {
5220         struct cpuid_info       *cpi;
5221         uint_t                  osvwid;
5222         static int              osvwfeature = -1;
5223         uint64_t                osvwlength;
5224 
5225 
5226         cpi = cpu->cpu_m.mcpu_cpi;
5227 
5228         /* confirm OSVW supported */
5229         if (osvwfeature == -1) {
5230                 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5231         } else {
5232                 /* assert that osvw feature setting is consistent on all cpus */
5233                 ASSERT(osvwfeature ==
5234                     (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5235         }
5236         if (!osvwfeature)
5237                 return (-1);
5238 
5239         osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5240 
5241         switch (erratum) {
5242         case 298:       /* osvwid is 0 */
5243                 osvwid = 0;
5244                 if (osvwlength <= (uint64_t)osvwid) {
5245                         /* osvwid 0 is unknown */
5246                         return (-1);
5247                 }
5248 
5249                 /*
5250                  * Check the OSVW STATUS MSR to determine the state
5251                  * of the erratum where:
5252                  *   0 - fixed by HW
5253                  *   1 - BIOS has applied the workaround when BIOS
5254                  *   workaround is available. (Or for other errata,
5255                  *   OS workaround is required.)
5256                  * For a value of 1, caller will confirm that the
5257                  * erratum 298 workaround has indeed been applied by BIOS.
5258                  *
5259                  * A 1 may be set in cpus that have a HW fix
5260                  * in a mixed cpu system. Regarding erratum 298:
5261                  *   In a multiprocessor platform, the workaround above
5262                  *   should be applied to all processors regardless of
5263                  *   silicon revision when an affected processor is
5264                  *   present.
5265                  */
5266 
5267                 return (rdmsr(MSR_AMD_OSVW_STATUS +
5268                     (osvwid / OSVW_ID_CNT_PER_MSR)) &
5269                     (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5270 
5271         default:
5272                 return (-1);
5273         }
5274 }
5275 
5276 static const char assoc_str[] = "associativity";
5277 static const char line_str[] = "line-size";
5278 static const char size_str[] = "size";
5279 
5280 static void
5281 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5282     uint32_t val)
5283 {
5284         char buf[128];
5285 
5286         /*
5287          * ndi_prop_update_int() is used because it is desirable for
5288          * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5289          */
5290         if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5291                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5292 }
5293 
5294 /*
5295  * Intel-style cache/tlb description
5296  *
5297  * Standard cpuid level 2 gives a randomly ordered
5298  * selection of tags that index into a table that describes
5299  * cache and tlb properties.
5300  */
5301 
5302 static const char l1_icache_str[] = "l1-icache";
5303 static const char l1_dcache_str[] = "l1-dcache";
5304 static const char l2_cache_str[] = "l2-cache";
5305 static const char l3_cache_str[] = "l3-cache";
5306 static const char itlb4k_str[] = "itlb-4K";
5307 static const char dtlb4k_str[] = "dtlb-4K";
5308 static const char itlb2M_str[] = "itlb-2M";
5309 static const char itlb4M_str[] = "itlb-4M";
5310 static const char dtlb4M_str[] = "dtlb-4M";
5311 static const char dtlb24_str[] = "dtlb0-2M-4M";
5312 static const char itlb424_str[] = "itlb-4K-2M-4M";
5313 static const char itlb24_str[] = "itlb-2M-4M";
5314 static const char dtlb44_str[] = "dtlb-4K-4M";
5315 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5316 static const char sl2_cache_str[] = "sectored-l2-cache";
5317 static const char itrace_str[] = "itrace-cache";
5318 static const char sl3_cache_str[] = "sectored-l3-cache";
5319 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5320 
5321 static const struct cachetab {
5322         uint8_t         ct_code;
5323         uint8_t         ct_assoc;
5324         uint16_t        ct_line_size;
5325         size_t          ct_size;
5326         const char      *ct_label;
5327 } intel_ctab[] = {
5328         /*
5329          * maintain descending order!
5330          *
5331          * Codes ignored - Reason
5332          * ----------------------
5333          * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5334          * f0H/f1H - Currently we do not interpret prefetch size by design
5335          */
5336         { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5337         { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5338         { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5339         { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5340         { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5341         { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5342         { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5343         { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5344         { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5345         { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5346         { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5347         { 0xd0, 4, 64, 512*1024, l3_cache_str},
5348         { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5349         { 0xc0, 4, 0, 8, dtlb44_str },
5350         { 0xba, 4, 0, 64, dtlb4k_str },
5351         { 0xb4, 4, 0, 256, dtlb4k_str },
5352         { 0xb3, 4, 0, 128, dtlb4k_str },
5353         { 0xb2, 4, 0, 64, itlb4k_str },
5354         { 0xb0, 4, 0, 128, itlb4k_str },
5355         { 0x87, 8, 64, 1024*1024, l2_cache_str},
5356         { 0x86, 4, 64, 512*1024, l2_cache_str},
5357         { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5358         { 0x84, 8, 32, 1024*1024, l2_cache_str},
5359         { 0x83, 8, 32, 512*1024, l2_cache_str},
5360         { 0x82, 8, 32, 256*1024, l2_cache_str},
5361         { 0x80, 8, 64, 512*1024, l2_cache_str},
5362         { 0x7f, 2, 64, 512*1024, l2_cache_str},
5363         { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5364         { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5365         { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5366         { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5367         { 0x79, 8, 64, 128*1024, sl2_cache_str},
5368         { 0x78, 8, 64, 1024*1024, l2_cache_str},
5369         { 0x73, 8, 0, 64*1024, itrace_str},
5370         { 0x72, 8, 0, 32*1024, itrace_str},
5371         { 0x71, 8, 0, 16*1024, itrace_str},
5372         { 0x70, 8, 0, 12*1024, itrace_str},
5373         { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5374         { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5375         { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5376         { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5377         { 0x5d, 0, 0, 256, dtlb44_str},
5378         { 0x5c, 0, 0, 128, dtlb44_str},
5379         { 0x5b, 0, 0, 64, dtlb44_str},
5380         { 0x5a, 4, 0, 32, dtlb24_str},
5381         { 0x59, 0, 0, 16, dtlb4k_str},
5382         { 0x57, 4, 0, 16, dtlb4k_str},
5383         { 0x56, 4, 0, 16, dtlb4M_str},
5384         { 0x55, 0, 0, 7, itlb24_str},
5385         { 0x52, 0, 0, 256, itlb424_str},
5386         { 0x51, 0, 0, 128, itlb424_str},
5387         { 0x50, 0, 0, 64, itlb424_str},
5388         { 0x4f, 0, 0, 32, itlb4k_str},
5389         { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5390         { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5391         { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5392         { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5393         { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5394         { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5395         { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5396         { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5397         { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5398         { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5399         { 0x44, 4, 32, 1024*1024, l2_cache_str},
5400         { 0x43, 4, 32, 512*1024, l2_cache_str},
5401         { 0x42, 4, 32, 256*1024, l2_cache_str},
5402         { 0x41, 4, 32, 128*1024, l2_cache_str},
5403         { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5404         { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5405         { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5406         { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5407         { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5408         { 0x39, 4, 64, 128*1024, sl2_cache_str},
5409         { 0x30, 8, 64, 32*1024, l1_icache_str},
5410         { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5411         { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5412         { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5413         { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5414         { 0x22, 4, 64, 512*1024, sl3_cache_str},
5415         { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5416         { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5417         { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5418         { 0x0b, 4, 0, 4, itlb4M_str},
5419         { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5420         { 0x08, 4, 32, 16*1024, l1_icache_str},
5421         { 0x06, 4, 32, 8*1024, l1_icache_str},
5422         { 0x05, 4, 0, 32, dtlb4M_str},
5423         { 0x04, 4, 0, 8, dtlb4M_str},
5424         { 0x03, 4, 0, 64, dtlb4k_str},
5425         { 0x02, 4, 0, 2, itlb4M_str},
5426         { 0x01, 4, 0, 32, itlb4k_str},
5427         { 0 }
5428 };
5429 
5430 static const struct cachetab cyrix_ctab[] = {
5431         { 0x70, 4, 0, 32, "tlb-4K" },
5432         { 0x80, 4, 16, 16*1024, "l1-cache" },
5433         { 0 }
5434 };
5435 
5436 /*
5437  * Search a cache table for a matching entry
5438  */
5439 static const struct cachetab *
5440 find_cacheent(const struct cachetab *ct, uint_t code)
5441 {
5442         if (code != 0) {
5443                 for (; ct->ct_code != 0; ct++)
5444                         if (ct->ct_code <= code)
5445                                 break;
5446                 if (ct->ct_code == code)
5447                         return (ct);
5448         }
5449         return (NULL);
5450 }
5451 
5452 /*
5453  * Populate cachetab entry with L2 or L3 cache-information using
5454  * cpuid function 4. This function is called from intel_walk_cacheinfo()
5455  * when descriptor 0x49 is encountered. It returns 0 if no such cache
5456  * information is found.
5457  */
5458 static int
5459 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5460 {
5461         uint32_t level, i;
5462         int ret = 0;
5463 
5464         for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5465                 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5466 
5467                 if (level == 2 || level == 3) {
5468                         ct->ct_assoc =
5469                             CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5470                         ct->ct_line_size =
5471                             CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5472                         ct->ct_size = ct->ct_assoc *
5473                             (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5474                             ct->ct_line_size *
5475                             (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5476 
5477                         if (level == 2) {
5478                                 ct->ct_label = l2_cache_str;
5479                         } else if (level == 3) {
5480                                 ct->ct_label = l3_cache_str;
5481                         }
5482                         ret = 1;
5483                 }
5484         }
5485 
5486         return (ret);
5487 }
5488 
5489 /*
5490  * Walk the cacheinfo descriptor, applying 'func' to every valid element
5491  * The walk is terminated if the walker returns non-zero.
5492  */
5493 static void
5494 intel_walk_cacheinfo(struct cpuid_info *cpi,
5495     void *arg, int (*func)(void *, const struct cachetab *))
5496 {
5497         const struct cachetab *ct;
5498         struct cachetab des_49_ct, des_b1_ct;
5499         uint8_t *dp;
5500         int i;
5501 
5502         if ((dp = cpi->cpi_cacheinfo) == NULL)
5503                 return;
5504         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5505                 /*
5506                  * For overloaded descriptor 0x49 we use cpuid function 4
5507                  * if supported by the current processor, to create
5508                  * cache information.
5509                  * For overloaded descriptor 0xb1 we use X86_PAE flag
5510                  * to disambiguate the cache information.
5511                  */
5512                 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5513                     intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5514                                 ct = &des_49_ct;
5515                 } else if (*dp == 0xb1) {
5516                         des_b1_ct.ct_code = 0xb1;
5517                         des_b1_ct.ct_assoc = 4;
5518                         des_b1_ct.ct_line_size = 0;
5519                         if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5520                                 des_b1_ct.ct_size = 8;
5521                                 des_b1_ct.ct_label = itlb2M_str;
5522                         } else {
5523                                 des_b1_ct.ct_size = 4;
5524                                 des_b1_ct.ct_label = itlb4M_str;
5525                         }
5526                         ct = &des_b1_ct;
5527                 } else {
5528                         if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5529                                 continue;
5530                         }
5531                 }
5532 
5533                 if (func(arg, ct) != 0) {
5534                         break;
5535                 }
5536         }
5537 }
5538 
5539 /*
5540  * (Like the Intel one, except for Cyrix CPUs)
5541  */
5542 static void
5543 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5544     void *arg, int (*func)(void *, const struct cachetab *))
5545 {
5546         const struct cachetab *ct;
5547         uint8_t *dp;
5548         int i;
5549 
5550         if ((dp = cpi->cpi_cacheinfo) == NULL)
5551                 return;
5552         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5553                 /*
5554                  * Search Cyrix-specific descriptor table first ..
5555                  */
5556                 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5557                         if (func(arg, ct) != 0)
5558                                 break;
5559                         continue;
5560                 }
5561                 /*
5562                  * .. else fall back to the Intel one
5563                  */
5564                 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5565                         if (func(arg, ct) != 0)
5566                                 break;
5567                         continue;
5568                 }
5569         }
5570 }
5571 
5572 /*
5573  * A cacheinfo walker that adds associativity, line-size, and size properties
5574  * to the devinfo node it is passed as an argument.
5575  */
5576 static int
5577 add_cacheent_props(void *arg, const struct cachetab *ct)
5578 {
5579         dev_info_t *devi = arg;
5580 
5581         add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5582         if (ct->ct_line_size != 0)
5583                 add_cache_prop(devi, ct->ct_label, line_str,
5584                     ct->ct_line_size);
5585         add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5586         return (0);
5587 }
5588 
5589 
5590 static const char fully_assoc[] = "fully-associative?";
5591 
5592 /*
5593  * AMD style cache/tlb description
5594  *
5595  * Extended functions 5 and 6 directly describe properties of
5596  * tlbs and various cache levels.
5597  */
5598 static void
5599 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5600 {
5601         switch (assoc) {
5602         case 0: /* reserved; ignore */
5603                 break;
5604         default:
5605                 add_cache_prop(devi, label, assoc_str, assoc);
5606                 break;
5607         case 0xff:
5608                 add_cache_prop(devi, label, fully_assoc, 1);
5609                 break;
5610         }
5611 }
5612 
5613 static void
5614 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5615 {
5616         if (size == 0)
5617                 return;
5618         add_cache_prop(devi, label, size_str, size);
5619         add_amd_assoc(devi, label, assoc);
5620 }
5621 
5622 static void
5623 add_amd_cache(dev_info_t *devi, const char *label,
5624     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5625 {
5626         if (size == 0 || line_size == 0)
5627                 return;
5628         add_amd_assoc(devi, label, assoc);
5629         /*
5630          * Most AMD parts have a sectored cache. Multiple cache lines are
5631          * associated with each tag. A sector consists of all cache lines
5632          * associated with a tag. For example, the AMD K6-III has a sector
5633          * size of 2 cache lines per tag.
5634          */
5635         if (lines_per_tag != 0)
5636                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5637         add_cache_prop(devi, label, line_str, line_size);
5638         add_cache_prop(devi, label, size_str, size * 1024);
5639 }
5640 
5641 static void
5642 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5643 {
5644         switch (assoc) {
5645         case 0: /* off */
5646                 break;
5647         case 1:
5648         case 2:
5649         case 4:
5650                 add_cache_prop(devi, label, assoc_str, assoc);
5651                 break;
5652         case 6:
5653                 add_cache_prop(devi, label, assoc_str, 8);
5654                 break;
5655         case 8:
5656                 add_cache_prop(devi, label, assoc_str, 16);
5657                 break;
5658         case 0xf:
5659                 add_cache_prop(devi, label, fully_assoc, 1);
5660                 break;
5661         default: /* reserved; ignore */
5662                 break;
5663         }
5664 }
5665 
5666 static void
5667 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5668 {
5669         if (size == 0 || assoc == 0)
5670                 return;
5671         add_amd_l2_assoc(devi, label, assoc);
5672         add_cache_prop(devi, label, size_str, size);
5673 }
5674 
5675 static void
5676 add_amd_l2_cache(dev_info_t *devi, const char *label,
5677     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5678 {
5679         if (size == 0 || assoc == 0 || line_size == 0)
5680                 return;
5681         add_amd_l2_assoc(devi, label, assoc);
5682         if (lines_per_tag != 0)
5683                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5684         add_cache_prop(devi, label, line_str, line_size);
5685         add_cache_prop(devi, label, size_str, size * 1024);
5686 }
5687 
5688 static void
5689 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5690 {
5691         struct cpuid_regs *cp;
5692 
5693         if (cpi->cpi_xmaxeax < 0x80000005)
5694                 return;
5695         cp = &cpi->cpi_extd[5];
5696 
5697         /*
5698          * 4M/2M L1 TLB configuration
5699          *
5700          * We report the size for 2M pages because AMD uses two
5701          * TLB entries for one 4M page.
5702          */
5703         add_amd_tlb(devi, "dtlb-2M",
5704             BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5705         add_amd_tlb(devi, "itlb-2M",
5706             BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5707 
5708         /*
5709          * 4K L1 TLB configuration
5710          */
5711 
5712         switch (cpi->cpi_vendor) {
5713                 uint_t nentries;
5714         case X86_VENDOR_TM:
5715                 if (cpi->cpi_family >= 5) {
5716                         /*
5717                          * Crusoe processors have 256 TLB entries, but
5718                          * cpuid data format constrains them to only
5719                          * reporting 255 of them.
5720                          */
5721                         if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5722                                 nentries = 256;
5723                         /*
5724                          * Crusoe processors also have a unified TLB
5725                          */
5726                         add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5727                             nentries);
5728                         break;
5729                 }
5730                 /*FALLTHROUGH*/
5731         default:
5732                 add_amd_tlb(devi, itlb4k_str,
5733                     BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5734                 add_amd_tlb(devi, dtlb4k_str,
5735                     BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5736                 break;
5737         }
5738 
5739         /*
5740          * data L1 cache configuration
5741          */
5742 
5743         add_amd_cache(devi, l1_dcache_str,
5744             BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5745             BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5746 
5747         /*
5748          * code L1 cache configuration
5749          */
5750 
5751         add_amd_cache(devi, l1_icache_str,
5752             BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5753             BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5754 
5755         if (cpi->cpi_xmaxeax < 0x80000006)
5756                 return;
5757         cp = &cpi->cpi_extd[6];
5758 
5759         /* Check for a unified L2 TLB for large pages */
5760 
5761         if (BITX(cp->cp_eax, 31, 16) == 0)
5762                 add_amd_l2_tlb(devi, "l2-tlb-2M",
5763                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5764         else {
5765                 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5766                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5767                 add_amd_l2_tlb(devi, "l2-itlb-2M",
5768                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5769         }
5770 
5771         /* Check for a unified L2 TLB for 4K pages */
5772 
5773         if (BITX(cp->cp_ebx, 31, 16) == 0) {
5774                 add_amd_l2_tlb(devi, "l2-tlb-4K",
5775                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5776         } else {
5777                 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5778                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5779                 add_amd_l2_tlb(devi, "l2-itlb-4K",
5780                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5781         }
5782 
5783         add_amd_l2_cache(devi, l2_cache_str,
5784             BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5785             BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5786 }
5787 
5788 /*
5789  * There are two basic ways that the x86 world describes it cache
5790  * and tlb architecture - Intel's way and AMD's way.
5791  *
5792  * Return which flavor of cache architecture we should use
5793  */
5794 static int
5795 x86_which_cacheinfo(struct cpuid_info *cpi)
5796 {
5797         switch (cpi->cpi_vendor) {
5798         case X86_VENDOR_Intel:
5799                 if (cpi->cpi_maxeax >= 2)
5800                         return (X86_VENDOR_Intel);
5801                 break;
5802         case X86_VENDOR_AMD:
5803                 /*
5804                  * The K5 model 1 was the first part from AMD that reported
5805                  * cache sizes via extended cpuid functions.
5806                  */
5807                 if (cpi->cpi_family > 5 ||
5808                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5809                         return (X86_VENDOR_AMD);
5810                 break;
5811         case X86_VENDOR_TM:
5812                 if (cpi->cpi_family >= 5)
5813                         return (X86_VENDOR_AMD);
5814                 /*FALLTHROUGH*/
5815         default:
5816                 /*
5817                  * If they have extended CPU data for 0x80000005
5818                  * then we assume they have AMD-format cache
5819                  * information.
5820                  *
5821                  * If not, and the vendor happens to be Cyrix,
5822                  * then try our-Cyrix specific handler.
5823                  *
5824                  * If we're not Cyrix, then assume we're using Intel's
5825                  * table-driven format instead.
5826                  */
5827                 if (cpi->cpi_xmaxeax >= 0x80000005)
5828                         return (X86_VENDOR_AMD);
5829                 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5830                         return (X86_VENDOR_Cyrix);
5831                 else if (cpi->cpi_maxeax >= 2)
5832                         return (X86_VENDOR_Intel);
5833                 break;
5834         }
5835         return (-1);
5836 }
5837 
5838 void
5839 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5840     struct cpuid_info *cpi)
5841 {
5842         dev_info_t *cpu_devi;
5843         int create;
5844 
5845         cpu_devi = (dev_info_t *)dip;
5846 
5847         /* device_type */
5848         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5849             "device_type", "cpu");
5850 
5851         /* reg */
5852         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5853             "reg", cpu_id);
5854 
5855         /* cpu-mhz, and clock-frequency */
5856         if (cpu_freq > 0) {
5857                 long long mul;
5858 
5859                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5860                     "cpu-mhz", cpu_freq);
5861                 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5862                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5863                             "clock-frequency", (int)mul);
5864         }
5865 
5866         if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5867                 return;
5868         }
5869 
5870         /* vendor-id */
5871         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5872             "vendor-id", cpi->cpi_vendorstr);
5873 
5874         if (cpi->cpi_maxeax == 0) {
5875                 return;
5876         }
5877 
5878         /*
5879          * family, model, and step
5880          */
5881         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5882             "family", CPI_FAMILY(cpi));
5883         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5884             "cpu-model", CPI_MODEL(cpi));
5885         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5886             "stepping-id", CPI_STEP(cpi));
5887 
5888         /* type */
5889         switch (cpi->cpi_vendor) {
5890         case X86_VENDOR_Intel:
5891                 create = 1;
5892                 break;
5893         default:
5894                 create = 0;
5895                 break;
5896         }
5897         if (create)
5898                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5899                     "type", CPI_TYPE(cpi));
5900 
5901         /* ext-family */
5902         switch (cpi->cpi_vendor) {
5903         case X86_VENDOR_Intel:
5904         case X86_VENDOR_AMD:
5905                 create = cpi->cpi_family >= 0xf;
5906                 break;
5907         default:
5908                 create = 0;
5909                 break;
5910         }
5911         if (create)
5912                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5913                     "ext-family", CPI_FAMILY_XTD(cpi));
5914 
5915         /* ext-model */
5916         switch (cpi->cpi_vendor) {
5917         case X86_VENDOR_Intel:
5918                 create = IS_EXTENDED_MODEL_INTEL(cpi);
5919                 break;
5920         case X86_VENDOR_AMD:
5921                 create = CPI_FAMILY(cpi) == 0xf;
5922                 break;
5923         default:
5924                 create = 0;
5925                 break;
5926         }
5927         if (create)
5928                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5929                     "ext-model", CPI_MODEL_XTD(cpi));
5930 
5931         /* generation */
5932         switch (cpi->cpi_vendor) {
5933         case X86_VENDOR_AMD:
5934                 /*
5935                  * AMD K5 model 1 was the first part to support this
5936                  */
5937                 create = cpi->cpi_xmaxeax >= 0x80000001;
5938                 break;
5939         default:
5940                 create = 0;
5941                 break;
5942         }
5943         if (create)
5944                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5945                     "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5946 
5947         /* brand-id */
5948         switch (cpi->cpi_vendor) {
5949         case X86_VENDOR_Intel:
5950                 /*
5951                  * brand id first appeared on Pentium III Xeon model 8,
5952                  * and Celeron model 8 processors and Opteron
5953                  */
5954                 create = cpi->cpi_family > 6 ||
5955                     (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5956                 break;
5957         case X86_VENDOR_AMD:
5958                 create = cpi->cpi_family >= 0xf;
5959                 break;
5960         default:
5961                 create = 0;
5962                 break;
5963         }
5964         if (create && cpi->cpi_brandid != 0) {
5965                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5966                     "brand-id", cpi->cpi_brandid);
5967         }
5968 
5969         /* chunks, and apic-id */
5970         switch (cpi->cpi_vendor) {
5971                 /*
5972                  * first available on Pentium IV and Opteron (K8)
5973                  */
5974         case X86_VENDOR_Intel:
5975                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5976                 break;
5977         case X86_VENDOR_AMD:
5978                 create = cpi->cpi_family >= 0xf;
5979                 break;
5980         default:
5981                 create = 0;
5982                 break;
5983         }
5984         if (create) {
5985                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5986                     "chunks", CPI_CHUNKS(cpi));
5987                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5988                     "apic-id", cpi->cpi_apicid);
5989                 if (cpi->cpi_chipid >= 0) {
5990                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5991                             "chip#", cpi->cpi_chipid);
5992                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5993                             "clog#", cpi->cpi_clogid);
5994                 }
5995         }
5996 
5997         /* cpuid-features */
5998         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5999             "cpuid-features", CPI_FEATURES_EDX(cpi));
6000 
6001 
6002         /* cpuid-features-ecx */
6003         switch (cpi->cpi_vendor) {
6004         case X86_VENDOR_Intel:
6005                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6006                 break;
6007         case X86_VENDOR_AMD:
6008                 create = cpi->cpi_family >= 0xf;
6009                 break;
6010         default:
6011                 create = 0;
6012                 break;
6013         }
6014         if (create)
6015                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6016                     "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6017 
6018         /* ext-cpuid-features */
6019         switch (cpi->cpi_vendor) {
6020         case X86_VENDOR_Intel:
6021         case X86_VENDOR_AMD:
6022         case X86_VENDOR_Cyrix:
6023         case X86_VENDOR_TM:
6024         case X86_VENDOR_Centaur:
6025                 create = cpi->cpi_xmaxeax >= 0x80000001;
6026                 break;
6027         default:
6028                 create = 0;
6029                 break;
6030         }
6031         if (create) {
6032                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6033                     "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6034                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6035                     "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6036         }
6037 
6038         /*
6039          * Brand String first appeared in Intel Pentium IV, AMD K5
6040          * model 1, and Cyrix GXm.  On earlier models we try and
6041          * simulate something similar .. so this string should always
6042          * same -something- about the processor, however lame.
6043          */
6044         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6045             "brand-string", cpi->cpi_brandstr);
6046 
6047         /*
6048          * Finally, cache and tlb information
6049          */
6050         switch (x86_which_cacheinfo(cpi)) {
6051         case X86_VENDOR_Intel:
6052                 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6053                 break;
6054         case X86_VENDOR_Cyrix:
6055                 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6056                 break;
6057         case X86_VENDOR_AMD:
6058                 amd_cache_info(cpi, cpu_devi);
6059                 break;
6060         default:
6061                 break;
6062         }
6063 }
6064 
6065 struct l2info {
6066         int *l2i_csz;
6067         int *l2i_lsz;
6068         int *l2i_assoc;
6069         int l2i_ret;
6070 };
6071 
6072 /*
6073  * A cacheinfo walker that fetches the size, line-size and associativity
6074  * of the L2 cache
6075  */
6076 static int
6077 intel_l2cinfo(void *arg, const struct cachetab *ct)
6078 {
6079         struct l2info *l2i = arg;
6080         int *ip;
6081 
6082         if (ct->ct_label != l2_cache_str &&
6083             ct->ct_label != sl2_cache_str)
6084                 return (0);     /* not an L2 -- keep walking */
6085 
6086         if ((ip = l2i->l2i_csz) != NULL)
6087                 *ip = ct->ct_size;
6088         if ((ip = l2i->l2i_lsz) != NULL)
6089                 *ip = ct->ct_line_size;
6090         if ((ip = l2i->l2i_assoc) != NULL)
6091                 *ip = ct->ct_assoc;
6092         l2i->l2i_ret = ct->ct_size;
6093         return (1);             /* was an L2 -- terminate walk */
6094 }
6095 
6096 /*
6097  * AMD L2/L3 Cache and TLB Associativity Field Definition:
6098  *
6099  *      Unlike the associativity for the L1 cache and tlb where the 8 bit
6100  *      value is the associativity, the associativity for the L2 cache and
6101  *      tlb is encoded in the following table. The 4 bit L2 value serves as
6102  *      an index into the amd_afd[] array to determine the associativity.
6103  *      -1 is undefined. 0 is fully associative.
6104  */
6105 
6106 static int amd_afd[] =
6107         {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6108 
6109 static void
6110 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6111 {
6112         struct cpuid_regs *cp;
6113         uint_t size, assoc;
6114         int i;
6115         int *ip;
6116 
6117         if (cpi->cpi_xmaxeax < 0x80000006)
6118                 return;
6119         cp = &cpi->cpi_extd[6];
6120 
6121         if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6122             (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6123                 uint_t cachesz = size * 1024;
6124                 assoc = amd_afd[i];
6125 
6126                 ASSERT(assoc != -1);
6127 
6128                 if ((ip = l2i->l2i_csz) != NULL)
6129                         *ip = cachesz;
6130                 if ((ip = l2i->l2i_lsz) != NULL)
6131                         *ip = BITX(cp->cp_ecx, 7, 0);
6132                 if ((ip = l2i->l2i_assoc) != NULL)
6133                         *ip = assoc;
6134                 l2i->l2i_ret = cachesz;
6135         }
6136 }
6137 
6138 int
6139 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6140 {
6141         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6142         struct l2info __l2info, *l2i = &__l2info;
6143 
6144         l2i->l2i_csz = csz;
6145         l2i->l2i_lsz = lsz;
6146         l2i->l2i_assoc = assoc;
6147         l2i->l2i_ret = -1;
6148 
6149         switch (x86_which_cacheinfo(cpi)) {
6150         case X86_VENDOR_Intel:
6151                 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6152                 break;
6153         case X86_VENDOR_Cyrix:
6154                 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6155                 break;
6156         case X86_VENDOR_AMD:
6157                 amd_l2cacheinfo(cpi, l2i);
6158                 break;
6159         default:
6160                 break;
6161         }
6162         return (l2i->l2i_ret);
6163 }
6164 
6165 #if !defined(__xpv)
6166 
6167 uint32_t *
6168 cpuid_mwait_alloc(cpu_t *cpu)
6169 {
6170         uint32_t        *ret;
6171         size_t          mwait_size;
6172 
6173         ASSERT(cpuid_checkpass(CPU, 2));
6174 
6175         mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6176         if (mwait_size == 0)
6177                 return (NULL);
6178 
6179         /*
6180          * kmem_alloc() returns cache line size aligned data for mwait_size
6181          * allocations.  mwait_size is currently cache line sized.  Neither
6182          * of these implementation details are guarantied to be true in the
6183          * future.
6184          *
6185          * First try allocating mwait_size as kmem_alloc() currently returns
6186          * correctly aligned memory.  If kmem_alloc() does not return
6187          * mwait_size aligned memory, then use mwait_size ROUNDUP.
6188          *
6189          * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6190          * decide to free this memory.
6191          */
6192         ret = kmem_zalloc(mwait_size, KM_SLEEP);
6193         if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6194                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6195                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6196                 *ret = MWAIT_RUNNING;
6197                 return (ret);
6198         } else {
6199                 kmem_free(ret, mwait_size);
6200                 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6201                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6202                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6203                 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6204                 *ret = MWAIT_RUNNING;
6205                 return (ret);
6206         }
6207 }
6208 
6209 void
6210 cpuid_mwait_free(cpu_t *cpu)
6211 {
6212         if (cpu->cpu_m.mcpu_cpi == NULL) {
6213                 return;
6214         }
6215 
6216         if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6217             cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6218                 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6219                     cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6220         }
6221 
6222         cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6223         cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6224 }
6225 
6226 void
6227 patch_tsc_read(int flag)
6228 {
6229         size_t cnt;
6230 
6231         switch (flag) {
6232         case TSC_NONE:
6233                 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6234                 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6235                 break;
6236         case TSC_RDTSC_MFENCE:
6237                 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6238                 (void) memcpy((void *)tsc_read,
6239                     (void *)&_tsc_mfence_start, cnt);
6240                 break;
6241         case TSC_RDTSC_LFENCE:
6242                 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6243                 (void) memcpy((void *)tsc_read,
6244                     (void *)&_tsc_lfence_start, cnt);
6245                 break;
6246         case TSC_TSCP:
6247                 cnt = &_tscp_end - &_tscp_start;
6248                 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6249                 break;
6250         default:
6251                 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6252                 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6253                 break;
6254         }
6255         tsc_type = flag;
6256 }
6257 
6258 int
6259 cpuid_deep_cstates_supported(void)
6260 {
6261         struct cpuid_info *cpi;
6262         struct cpuid_regs regs;
6263 
6264         ASSERT(cpuid_checkpass(CPU, 1));
6265 
6266         cpi = CPU->cpu_m.mcpu_cpi;
6267 
6268         if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6269                 return (0);
6270 
6271         switch (cpi->cpi_vendor) {
6272         case X86_VENDOR_Intel:
6273                 if (cpi->cpi_xmaxeax < 0x80000007)
6274                         return (0);
6275 
6276                 /*
6277                  * TSC run at a constant rate in all ACPI C-states?
6278                  */
6279                 regs.cp_eax = 0x80000007;
6280                 (void) __cpuid_insn(&regs);
6281                 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6282 
6283         default:
6284                 return (0);
6285         }
6286 }
6287 
6288 #endif  /* !__xpv */
6289 
6290 void
6291 post_startup_cpu_fixups(void)
6292 {
6293 #ifndef __xpv
6294         /*
6295          * Some AMD processors support C1E state. Entering this state will
6296          * cause the local APIC timer to stop, which we can't deal with at
6297          * this time.
6298          */
6299         if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6300                 on_trap_data_t otd;
6301                 uint64_t reg;
6302 
6303                 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6304                         reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6305                         /* Disable C1E state if it is enabled by BIOS */
6306                         if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6307                             AMD_ACTONCMPHALT_MASK) {
6308                                 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6309                                     AMD_ACTONCMPHALT_SHIFT);
6310                                 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6311                         }
6312                 }
6313                 no_trap();
6314         }
6315 #endif  /* !__xpv */
6316 }
6317 
6318 void
6319 enable_pcid(void)
6320 {
6321         if (x86_use_pcid == -1)
6322                 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6323 
6324         if (x86_use_invpcid == -1) {
6325                 x86_use_invpcid = is_x86_feature(x86_featureset,
6326                     X86FSET_INVPCID);
6327         }
6328 
6329         if (!x86_use_pcid)
6330                 return;
6331 
6332         /*
6333          * Intel say that on setting PCIDE, it immediately starts using the PCID
6334          * bits; better make sure there's nothing there.
6335          */
6336         ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6337 
6338         setcr4(getcr4() | CR4_PCIDE);
6339 }
6340 
6341 /*
6342  * Setup necessary registers to enable XSAVE feature on this processor.
6343  * This function needs to be called early enough, so that no xsave/xrstor
6344  * ops will execute on the processor before the MSRs are properly set up.
6345  *
6346  * Current implementation has the following assumption:
6347  * - cpuid_pass1() is done, so that X86 features are known.
6348  * - fpu_probe() is done, so that fp_save_mech is chosen.
6349  */
6350 void
6351 xsave_setup_msr(cpu_t *cpu)
6352 {
6353         ASSERT(fp_save_mech == FP_XSAVE);
6354         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6355 
6356         /* Enable OSXSAVE in CR4. */
6357         setcr4(getcr4() | CR4_OSXSAVE);
6358         /*
6359          * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6360          * correct value.
6361          */
6362         cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6363         setup_xfem();
6364 }
6365 
6366 /*
6367  * Starting with the Westmere processor the local
6368  * APIC timer will continue running in all C-states,
6369  * including the deepest C-states.
6370  */
6371 int
6372 cpuid_arat_supported(void)
6373 {
6374         struct cpuid_info *cpi;
6375         struct cpuid_regs regs;
6376 
6377         ASSERT(cpuid_checkpass(CPU, 1));
6378         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6379 
6380         cpi = CPU->cpu_m.mcpu_cpi;
6381 
6382         switch (cpi->cpi_vendor) {
6383         case X86_VENDOR_Intel:
6384                 /*
6385                  * Always-running Local APIC Timer is
6386                  * indicated by CPUID.6.EAX[2].
6387                  */
6388                 if (cpi->cpi_maxeax >= 6) {
6389                         regs.cp_eax = 6;
6390                         (void) cpuid_insn(NULL, &regs);
6391                         return (regs.cp_eax & CPUID_CSTATE_ARAT);
6392                 } else {
6393                         return (0);
6394                 }
6395         default:
6396                 return (0);
6397         }
6398 }
6399 
6400 /*
6401  * Check support for Intel ENERGY_PERF_BIAS feature
6402  */
6403 int
6404 cpuid_iepb_supported(struct cpu *cp)
6405 {
6406         struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6407         struct cpuid_regs regs;
6408 
6409         ASSERT(cpuid_checkpass(cp, 1));
6410 
6411         if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6412             !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6413                 return (0);
6414         }
6415 
6416         /*
6417          * Intel ENERGY_PERF_BIAS MSR is indicated by
6418          * capability bit CPUID.6.ECX.3
6419          */
6420         if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6421                 return (0);
6422 
6423         regs.cp_eax = 0x6;
6424         (void) cpuid_insn(NULL, &regs);
6425         return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6426 }
6427 
6428 /*
6429  * Check support for TSC deadline timer
6430  *
6431  * TSC deadline timer provides a superior software programming
6432  * model over local APIC timer that eliminates "time drifts".
6433  * Instead of specifying a relative time, software specifies an
6434  * absolute time as the target at which the processor should
6435  * generate a timer event.
6436  */
6437 int
6438 cpuid_deadline_tsc_supported(void)
6439 {
6440         struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6441         struct cpuid_regs regs;
6442 
6443         ASSERT(cpuid_checkpass(CPU, 1));
6444         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6445 
6446         switch (cpi->cpi_vendor) {
6447         case X86_VENDOR_Intel:
6448                 if (cpi->cpi_maxeax >= 1) {
6449                         regs.cp_eax = 1;
6450                         (void) cpuid_insn(NULL, &regs);
6451                         return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6452                 } else {
6453                         return (0);
6454                 }
6455         default:
6456                 return (0);
6457         }
6458 }
6459 
6460 #if defined(__amd64) && !defined(__xpv)
6461 /*
6462  * Patch in versions of bcopy for high performance Intel Nhm processors
6463  * and later...
6464  */
6465 void
6466 patch_memops(uint_t vendor)
6467 {
6468         size_t cnt, i;
6469         caddr_t to, from;
6470 
6471         if ((vendor == X86_VENDOR_Intel) &&
6472             is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6473                 cnt = &bcopy_patch_end - &bcopy_patch_start;
6474                 to = &bcopy_ck_size;
6475                 from = &bcopy_patch_start;
6476                 for (i = 0; i < cnt; i++) {
6477                         *to++ = *from++;
6478                 }
6479         }
6480 }
6481 #endif  /* __amd64 && !__xpv */
6482 
6483 /*
6484  * We're being asked to tell the system how many bits are required to represent
6485  * the various thread and strand IDs. While it's tempting to derive this based
6486  * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6487  * correct. Instead, this needs to be based on the number of bits that the APIC
6488  * allows for these different configurations. We only update these to a larger
6489  * value if we find one.
6490  */
6491 void
6492 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6493 {
6494         struct cpuid_info *cpi;
6495 
6496         VERIFY(cpuid_checkpass(CPU, 1));
6497         cpi = cpu->cpu_m.mcpu_cpi;
6498 
6499         if (cpi->cpi_ncore_bits > *core_nbits) {
6500                 *core_nbits = cpi->cpi_ncore_bits;
6501         }
6502 
6503         if (cpi->cpi_nthread_bits > *strand_nbits) {
6504                 *strand_nbits = cpi->cpi_nthread_bits;
6505         }
6506 }
6507 
6508 void
6509 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6510 {
6511         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6512         struct cpuid_regs cp;
6513 
6514         /*
6515          * Reread the CPUID portions that we need for various security
6516          * information.
6517          */
6518         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6519                 /*
6520                  * Check if we now have leaf 7 available to us.
6521                  */
6522                 if (cpi->cpi_maxeax < 7) {
6523                         bzero(&cp, sizeof (cp));
6524                         cp.cp_eax = 0;
6525                         cpi->cpi_maxeax = __cpuid_insn(&cp);
6526                         if (cpi->cpi_maxeax < 7)
6527                                 return;
6528                 }
6529 
6530                 bzero(&cp, sizeof (cp));
6531                 cp.cp_eax = 7;
6532                 cp.cp_ecx = 0;
6533                 (void) __cpuid_insn(&cp);
6534                 cpi->cpi_std[7] = cp;
6535         } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6536                 /* No xcpuid support */
6537                 if (cpi->cpi_family < 5 ||
6538                     (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6539                         return;
6540 
6541                 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6542                         bzero(&cp, sizeof (cp));
6543                         cp.cp_eax = CPUID_LEAF_EXT_0;
6544                         cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6545                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6546                                 return;
6547                         }
6548                 }
6549 
6550                 bzero(&cp, sizeof (cp));
6551                 cp.cp_eax = CPUID_LEAF_EXT_8;
6552                 (void) __cpuid_insn(&cp);
6553                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6554                 cpi->cpi_extd[8] = cp;
6555         } else {
6556                 /*
6557                  * Nothing to do here. Return an empty set which has already
6558                  * been zeroed for us.
6559                  */
6560                 return;
6561         }
6562         cpuid_scan_security(cpu, fset);
6563 }
6564 
6565 /* ARGSUSED */
6566 static int
6567 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6568 {
6569         uchar_t *fset;
6570 
6571         fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6572         cpuid_pass_ucode(CPU, fset);
6573 
6574         return (0);
6575 }
6576 
6577 /*
6578  * After a microcode update where the version has changed, then we need to
6579  * rescan CPUID. To do this we check every CPU to make sure that they have the
6580  * same microcode. Then we perform a cross call to all such CPUs. It's the
6581  * caller's job to make sure that no one else can end up doing an update while
6582  * this is going on.
6583  *
6584  * We assume that the system is microcode capable if we're called.
6585  */
6586 void
6587 cpuid_post_ucodeadm(void)
6588 {
6589         uint32_t rev;
6590         int i;
6591         struct cpu *cpu;
6592         cpuset_t cpuset;
6593         void *argdata;
6594         uchar_t *f0;
6595 
6596         argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6597 
6598         mutex_enter(&cpu_lock);
6599         cpu = cpu_get(0);
6600         rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6601         CPUSET_ONLY(cpuset, 0);
6602         for (i = 1; i < max_ncpus; i++) {
6603                 if ((cpu = cpu_get(i)) == NULL)
6604                         continue;
6605 
6606                 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6607                         panic("post microcode update CPU %d has differing "
6608                             "microcode revision (%u) from CPU 0 (%u)",
6609                             i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6610                 }
6611                 CPUSET_ADD(cpuset, i);
6612         }
6613 
6614         kpreempt_disable();
6615         xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6616             cpuid_post_ucodeadm_xc);
6617         kpreempt_enable();
6618 
6619         /*
6620          * OK, now look at each CPU and see if their feature sets are equal.
6621          */
6622         f0 = argdata;
6623         for (i = 1; i < max_ncpus; i++) {
6624                 uchar_t *fset;
6625                 if (!CPU_IN_SET(cpuset, i))
6626                         continue;
6627 
6628                 fset = (uchar_t *)((uintptr_t)argdata +
6629                     sizeof (x86_featureset) * i);
6630 
6631                 if (!compare_x86_featureset(f0, fset)) {
6632                         panic("Post microcode update CPU %d has "
6633                             "differing security feature (%p) set from CPU 0 "
6634                             "(%p), not appending to feature set", i,
6635                             (void *)fset, (void *)f0);
6636                 }
6637         }
6638 
6639         mutex_exit(&cpu_lock);
6640 
6641         for (i = 0; i < NUM_X86_FEATURES; i++) {
6642                 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6643                     x86_feature_names[i]);
6644                 if (is_x86_feature(f0, i)) {
6645                         add_x86_feature(x86_featureset, i);
6646                 }
6647         }
6648         kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6649 }