1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
  26  */
  27 /*
  28  * Copyright (c) 2010, Intel Corporation.
  29  * All rights reserved.
  30  */
  31 /*
  32  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  33  */
  34 /*
  35  * Copyright 2019, Joyent, Inc.
  36  */
  37 
  38 /*
  39  * CPU Identification logic
  40  *
  41  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  42  * with the identification of CPUs, their features, and their topologies. More
  43  * specifically, this file helps drive the following:
  44  *
  45  * 1. Enumeration of features of the processor which are used by the kernel to
  46  *    determine what features to enable or disable. These may be instruction set
  47  *    enhancements or features that we use.
  48  *
  49  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  50  *    will be told about through the auxiliary vector.
  51  *
  52  * 3. Understanding the physical topology of the CPU such as the number of
  53  *    caches, how many cores it has, whether or not it supports symmetric
  54  *    multi-processing (SMT), etc.
  55  *
  56  * ------------------------
  57  * CPUID History and Basics
  58  * ------------------------
  59  *
  60  * The cpuid instruction was added by Intel roughly around the time that the
  61  * original Pentium was introduced. The purpose of cpuid was to tell in a
  62  * programmatic fashion information about the CPU that previously was guessed
  63  * at. For example, an important part of cpuid is that we can know what
  64  * extensions to the ISA exist. If you use an invalid opcode you would get a
  65  * #UD, so this method allows a program (whether a user program or the kernel)
  66  * to determine what exists without crashing or getting a SIGILL. Of course,
  67  * this was also during the era of the clones and the AMD Am5x86. The vendor
  68  * name shows up first in cpuid for a reason.
  69  *
  70  * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
  71  * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
  72  * its own meaning. The different leaves are broken down into different regions:
  73  *
  74  *      [ 0, 7fffffff ]                 This region is called the 'basic'
  75  *                                      region. This region is generally defined
  76  *                                      by Intel, though some of the original
  77  *                                      portions have different meanings based
  78  *                                      on the manufacturer. These days, Intel
  79  *                                      adds most new features to this region.
  80  *                                      AMD adds non-Intel compatible
  81  *                                      information in the third, extended
  82  *                                      region. Intel uses this for everything
  83  *                                      including ISA extensions, CPU
  84  *                                      features, cache information, topology,
  85  *                                      and more.
  86  *
  87  *                                      There is a hole carved out of this
  88  *                                      region which is reserved for
  89  *                                      hypervisors.
  90  *
  91  *      [ 40000000, 4fffffff ]          This region, which is found in the
  92  *                                      middle of the previous region, is
  93  *                                      explicitly promised to never be used by
  94  *                                      CPUs. Instead, it is used by hypervisors
  95  *                                      to communicate information about
  96  *                                      themselves to the operating system. The
  97  *                                      values and details are unique for each
  98  *                                      hypervisor.
  99  *
 100  *      [ 80000000, ffffffff ]          This region is called the 'extended'
 101  *                                      region. Some of the low leaves mirror
 102  *                                      parts of the basic leaves. This region
 103  *                                      has generally been used by AMD for
 104  *                                      various extensions. For example, AMD-
 105  *                                      specific information about caches,
 106  *                                      features, and topology are found in this
 107  *                                      region.
 108  *
 109  * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
 110  * and %edx, and then issue the cpuid instruction. At the first leaf in each of
 111  * the ranges, one of the primary things returned is the maximum valid leaf in
 112  * that range. This allows for discovery of what range of CPUID is valid.
 113  *
 114  * The CPUs have potentially surprising behavior when using an invalid leaf or
 115  * unimplemented leaf. If the requested leaf is within the valid basic or
 116  * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
 117  * set to zero. However, if you specify a leaf that is outside of a valid range,
 118  * then instead it will be filled with the last valid _basic_ leaf. For example,
 119  * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
 120  * an invalid extended leaf will return the information for leaf 3.
 121  *
 122  * Some leaves are broken down into sub-leaves. This means that the value
 123  * depends on both the leaf asked for in %eax and a secondary register. For
 124  * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
 125  * additional information. Or when getting topology information in leaf 0xb, the
 126  * initial value in %ecx changes which level of the topology that you are
 127  * getting information about.
 128  *
 129  * cpuid values are always kept to 32 bits regardless of whether or not the
 130  * program is in 64-bit mode. When executing in 64-bit mode, the upper
 131  * 32 bits of the register are always set to zero so that way the values are the
 132  * same regardless of execution mode.
 133  *
 134  * ----------------------
 135  * Identifying Processors
 136  * ----------------------
 137  *
 138  * We can identify a processor in two steps. The first step looks at cpuid leaf
 139  * 0. Leaf 0 contains the processor's vendor information. This is done by
 140  * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
 141  * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
 142  *
 143  * From there, a processor is identified by a combination of three different
 144  * values:
 145  *
 146  *  1. Family
 147  *  2. Model
 148  *  3. Stepping
 149  *
 150  * Each vendor uses the family and model to uniquely identify a processor. The
 151  * way that family and model are changed depends on the vendor. For example,
 152  * Intel has been using family 0x6 for almost all of their processor since the
 153  * Pentium Pro/Pentium II era, often called the P6. The model is used to
 154  * identify the exact processor. Different models are often used for the client
 155  * (consumer) and server parts. Even though each processor often has major
 156  * architectural differences, they still are considered the same family by
 157  * Intel.
 158  *
 159  * On the other hand, each major AMD architecture generally has its own family.
 160  * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
 161  * the model number is used to help identify specific processors.
 162  *
 163  * The stepping is used to refer to a revision of a specific microprocessor. The
 164  * term comes from equipment used to produce masks that are used to create
 165  * integrated circuits.
 166  *
 167  * The information is present in leaf 1, %eax. In technical documentation you
 168  * will see the terms extended model and extended family. The original family,
 169  * model, and stepping fields were each 4 bits wide. If the values in either
 170  * are 0xf, then one is to consult the extended model and extended family, which
 171  * take previously reserved bits and allow for a larger number of models and add
 172  * 0xf to them.
 173  *
 174  * When we process this information, we store the full family, model, and
 175  * stepping in the struct cpuid_info members cpi_family, cpi_model, and
 176  * cpi_step, respectively. Whenever you are performing comparisons with the
 177  * family, model, and stepping, you should use these members and not the raw
 178  * values from cpuid. If you must use the raw values from cpuid directly, you
 179  * must make sure that you add the extended model and family to the base model
 180  * and family.
 181  *
 182  * In general, we do not use information about the family, model, and stepping
 183  * to determine whether or not a feature is present; that is generally driven by
 184  * specific leaves. However, when something we care about on the processor is
 185  * not considered 'architectural' meaning that it is specific to a set of
 186  * processors and not promised in the architecture model to be consistent from
 187  * generation to generation, then we will fall back on this information. The
 188  * most common cases where this comes up is when we have to workaround errata in
 189  * the processor, are dealing with processor-specific features such as CPU
 190  * performance counters, or we want to provide additional information for things
 191  * such as fault management.
 192  *
 193  * While processors also do have a brand string, which is the name that people
 194  * are familiar with when buying the processor, they are not meant for
 195  * programmatic consumption. That is what the family, model, and stepping are
 196  * for.
 197  *
 198  * ------------
 199  * CPUID Passes
 200  * ------------
 201  *
 202  * As part of performing feature detection, we break this into several different
 203  * passes. The passes are as follows:
 204  *
 205  *      Pass 0          This is a primordial pass done in locore.s to deal with
 206  *                      Cyrix CPUs that don't support cpuid. The reality is that
 207  *                      we likely don't run on them any more, but there is still
 208  *                      logic for handling them.
 209  *
 210  *      Pass 1          This is the primary pass and is responsible for doing a
 211  *                      large number of different things:
 212  *
 213  *                      1. Determine which vendor manufactured the CPU and
 214  *                      determining the family, model, and stepping information.
 215  *
 216  *                      2. Gathering a large number of feature flags to
 217  *                      determine which features the CPU support and which
 218  *                      indicate things that we need to do other work in the OS
 219  *                      to enable. Features detected this way are added to the
 220  *                      x86_featureset which can be queried to
 221  *                      determine what we should do. This includes processing
 222  *                      all of the basic and extended CPU features that we care
 223  *                      about.
 224  *
 225  *                      3. Determining the CPU's topology. This includes
 226  *                      information about how many cores and threads are present
 227  *                      in the package. It also is responsible for figuring out
 228  *                      which logical CPUs are potentially part of the same core
 229  *                      and what other resources they might share. For more
 230  *                      information see the 'Topology' section.
 231  *
 232  *                      4. Determining the set of CPU security-specific features
 233  *                      that we need to worry about and determine the
 234  *                      appropriate set of workarounds.
 235  *
 236  *                      Pass 1 on the boot CPU occurs before KMDB is started.
 237  *
 238  *      Pass 2          The second pass is done after startup(). Here, we check
 239  *                      other miscellaneous features. Most of this is gathering
 240  *                      additional basic and extended features that we'll use in
 241  *                      later passes or for debugging support.
 242  *
 243  *      Pass 3          The third pass occurs after the kernel memory allocator
 244  *                      has been fully initialized. This gathers information
 245  *                      where we might need dynamic memory available for our
 246  *                      uses. This includes several varying width leaves that
 247  *                      have cache information and the processor's brand string.
 248  *
 249  *      Pass 4          The fourth and final normal pass is performed after the
 250  *                      kernel has brought most everything online. This is
 251  *                      invoked from post_startup(). In this pass, we go through
 252  *                      the set of features that we have enabled and turn that
 253  *                      into the hardware auxiliary vector features that
 254  *                      userland receives. This is used by userland, primarily
 255  *                      by the run-time link-editor (RTLD), though userland
 256  *                      software could also refer to it directly.
 257  *
 258  *      Microcode       After a microcode update, we do a selective rescan of
 259  *                      the cpuid leaves to determine what features have
 260  *                      changed. Microcode updates can provide more details
 261  *                      about security related features to deal with issues like
 262  *                      Spectre and L1TF. On occasion, vendors have violated
 263  *                      their contract and removed bits. However, we don't try
 264  *                      to detect that because that puts us in a situation that
 265  *                      we really can't deal with. As such, the only thing we
 266  *                      rescan are security related features today. See
 267  *                      cpuid_pass_ucode().
 268  *
 269  * All of the passes (except pass 0) are run on all CPUs. However, for the most
 270  * part we only care about what the boot CPU says about this information and use
 271  * the other CPUs as a rough guide to sanity check that we have the same feature
 272  * set.
 273  *
 274  * We do not support running multiple logical CPUs with disjoint, let alone
 275  * different, feature sets.
 276  *
 277  * ------------------
 278  * Processor Topology
 279  * ------------------
 280  *
 281  * One of the important things that we need to do is to understand the topology
 282  * of the underlying processor. When we say topology in this case, we're trying
 283  * to understand the relationship between the logical CPUs that the operating
 284  * system sees and the underlying physical layout. Different logical CPUs may
 285  * share different resources which can have important consequences for the
 286  * performance of the system. For example, they may share caches, execution
 287  * units, and more.
 288  *
 289  * The topology of the processor changes from generation to generation and
 290  * vendor to vendor.  Along with that, different vendors use different
 291  * terminology, and the operating system itself uses occasionally overlapping
 292  * terminology. It's important to understand what this topology looks like so
 293  * one can understand the different things that we try to calculate and
 294  * determine.
 295  *
 296  * To get started, let's talk about a little bit of terminology that we've used
 297  * so far, is used throughout this file, and is fairly generic across multiple
 298  * vendors:
 299  *
 300  * CPU
 301  *      A central processing unit (CPU) refers to a logical and/or virtual
 302  *      entity that the operating system can execute instructions on. The
 303  *      underlying resources for this CPU may be shared between multiple
 304  *      entities; however, to the operating system it is a discrete unit.
 305  *
 306  * PROCESSOR and PACKAGE
 307  *
 308  *      Generally, when we use the term 'processor' on its own, we are referring
 309  *      to the physical entity that one buys and plugs into a board. However,
 310  *      because processor has been overloaded and one might see it used to mean
 311  *      multiple different levels, we will instead use the term 'package' for
 312  *      the rest of this file. The term package comes from the electrical
 313  *      engineering side and refers to the physical entity that encloses the
 314  *      electronics inside. Strictly speaking the package can contain more than
 315  *      just the CPU, for example, on many processors it may also have what's
 316  *      called an 'integrated graphical processing unit (GPU)'. Because the
 317  *      package can encapsulate multiple units, it is the largest physical unit
 318  *      that we refer to.
 319  *
 320  * SOCKET
 321  *
 322  *      A socket refers to unit on a system board (generally the motherboard)
 323  *      that can receive a package. A single package, or processor, is plugged
 324  *      into a single socket. A system may have multiple sockets. Often times,
 325  *      the term socket is used interchangeably with package and refers to the
 326  *      electrical component that has plugged in, and not the receptacle itself.
 327  *
 328  * CORE
 329  *
 330  *      A core refers to the physical instantiation of a CPU, generally, with a
 331  *      full set of hardware resources available to it. A package may contain
 332  *      multiple cores inside of it or it may just have a single one. A
 333  *      processor with more than one core is often referred to as 'multi-core'.
 334  *      In illumos, we will use the feature X86FSET_CMP to refer to a system
 335  *      that has 'multi-core' processors.
 336  *
 337  *      A core may expose a single logical CPU to the operating system, or it
 338  *      may expose multiple CPUs, which we call threads, defined below.
 339  *
 340  *      Some resources may still be shared by cores in the same package. For
 341  *      example, many processors will share the level 3 cache between cores.
 342  *      Some AMD generations share hardware resources between cores. For more
 343  *      information on that see the section 'AMD Topology'.
 344  *
 345  * THREAD and STRAND
 346  *
 347  *      In this file, generally a thread refers to a hardware resources and not
 348  *      the operating system's logical abstraction. A thread is always exposed
 349  *      as an independent logical CPU to the operating system. A thread belongs
 350  *      to a specific core. A core may have more than one thread. When that is
 351  *      the case, the threads that are part of the same core are often referred
 352  *      to as 'siblings'.
 353  *
 354  *      When multiple threads exist, this is generally referred to as
 355  *      simultaneous multi-threading (SMT). When Intel introduced this in their
 356  *      processors they called it hyper-threading (HT). When multiple threads
 357  *      are active in a core, they split the resources of the core. For example,
 358  *      two threads may share the same set of hardware execution units.
 359  *
 360  *      The operating system often uses the term 'strand' to refer to a thread.
 361  *      This helps disambiguate it from the software concept.
 362  *
 363  * CHIP
 364  *
 365  *      Unfortunately, the term 'chip' is dramatically overloaded. At its most
 366  *      base meaning, it is used to refer to a single integrated circuit, which
 367  *      may or may not be the only thing in the package. In illumos, when you
 368  *      see the term 'chip' it is almost always referring to the same thing as
 369  *      the 'package'. However, many vendors may use chip to refer to one of
 370  *      many integrated circuits that have been placed in the package. As an
 371  *      example, see the subsequent definition.
 372  *
 373  *      To try and keep things consistent, we will only use chip when referring
 374  *      to the entire integrated circuit package, with the exception of the
 375  *      definition of multi-chip module (because it is in the name) and use the
 376  *      term 'die' when we want the more general, potential sub-component
 377  *      definition.
 378  *
 379  * DIE
 380  *
 381  *      A die refers to an integrated circuit. Inside of the package there may
 382  *      be a single die or multiple dies. This is sometimes called a 'chip' in
 383  *      vendor's parlance, but in this file, we use the term die to refer to a
 384  *      subcomponent.
 385  *
 386  * MULTI-CHIP MODULE
 387  *
 388  *      A multi-chip module (MCM) refers to putting multiple distinct chips that
 389  *      are connected together in the same package. When a multi-chip design is
 390  *      used, generally each chip is manufactured independently and then joined
 391  *      together in the package. For example, on AMD's Zen microarchitecture
 392  *      (family 0x17), the package contains several dies (the second meaning of
 393  *      chip from above) that are connected together.
 394  *
 395  * CACHE
 396  *
 397  *      A cache is a part of the processor that maintains copies of recently
 398  *      accessed memory. Caches are split into levels and then into types.
 399  *      Commonly there are one to three levels, called level one, two, and
 400  *      three. The lower the level, the smaller it is, the closer it is to the
 401  *      execution units of the CPU, and the faster it is to access. The layout
 402  *      and design of the cache come in many different flavors, consult other
 403  *      resources for a discussion of those.
 404  *
 405  *      Caches are generally split into two types, the instruction and data
 406  *      cache. The caches contain what their names suggest, the instruction
 407  *      cache has executable program text, while the data cache has all other
 408  *      memory that the processor accesses. As of this writing, data is kept
 409  *      coherent between all of the caches on x86, so if one modifies program
 410  *      text before it is executed, that will be in the data cache, and the
 411  *      instruction cache will be synchronized with that change when the
 412  *      processor actually executes those instructions. This coherency also
 413  *      covers the fact that data could show up in multiple caches.
 414  *
 415  *      Generally, the lowest level caches are specific to a core. However, the
 416  *      last layer cache is shared between some number of cores. The number of
 417  *      CPUs sharing this last level cache is important. This has implications
 418  *      for the choices that the scheduler makes, as accessing memory that might
 419  *      be in a remote cache after thread migration can be quite expensive.
 420  *
 421  *      Sometimes, the word cache is abbreviated with a '$', because in US
 422  *      English the word cache is pronounced the same as cash. So L1D$ refers to
 423  *      the L1 data cache, and L2$ would be the L2 cache. This will not be used
 424  *      in the rest of this theory statement for clarity.
 425  *
 426  * MEMORY CONTROLLER
 427  *
 428  *      The memory controller is a component that provides access to DRAM. Each
 429  *      memory controller can access a set number of DRAM channels. Each channel
 430  *      can have a number of DIMMs (sticks of memory) associated with it. A
 431  *      given package may have more than one memory controller. The association
 432  *      of the memory controller to a group of cores is important as it is
 433  *      cheaper to access memory on the controller that you are associated with.
 434  *
 435  * NUMA
 436  *
 437  *      NUMA or non-uniform memory access, describes a way that systems are
 438  *      built. On x86, any processor core can address all of the memory in the
 439  *      system. However, When using multiple sockets or possibly within a
 440  *      multi-chip module, some of that memory is physically closer and some of
 441  *      it is further. Memory that is further away is more expensive to access.
 442  *      Consider the following image of multiple sockets with memory:
 443  *
 444  *      +--------+                                                +--------+
 445  *      | DIMM A |         +----------+      +----------+         | DIMM D |
 446  *      +--------+-+       |          |      |          |       +-+------+-+
 447  *        | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
 448  *        +--------+-+     |          |      |          |     +-+------+-+
 449  *          | DIMM C |     +----------+      +----------+     | DIMM F |
 450  *          +--------+                                        +--------+
 451  *
 452  *      In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
 453  *      closer to DIMMs D-F. This means that it is cheaper for socket 0 to
 454  *      access DIMMs A-C and more expensive to access D-F as it has to go
 455  *      through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
 456  *      D-F are cheaper than A-C. While the socket form is the most common, when
 457  *      using multi-chip modules, this can also sometimes occur. For another
 458  *      example of this that's more involved, see the AMD topology section.
 459  *
 460  *
 461  * Intel Topology
 462  * --------------
 463  *
 464  * Most Intel processors since Nehalem, (as of this writing the current gen
 465  * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
 466  * the package is a single monolithic die. MCMs currently aren't used. Most
 467  * parts have three levels of caches, with the L3 cache being shared between
 468  * all of the cores on the package. The L1/L2 cache is generally specific to
 469  * an individual core. The following image shows at a simplified level what
 470  * this looks like. The memory controller is commonly part of something called
 471  * the 'Uncore', that used to be separate physical chips that were not a part of
 472  * the package, but are now part of the same chip.
 473  *
 474  *  +-----------------------------------------------------------------------+
 475  *  | Package                                                               |
 476  *  |  +-------------------+  +-------------------+  +-------------------+  |
 477  *  |  | Core              |  | Core              |  | Core              |  |
 478  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 479  *  |  |  | Thread | | L | |  |  | Thread | | L | |  |  | Thread | | L | |  |
 480  *  |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |
 481  *  |  |  +--------+ |   | |  |  +--------+ |   | |  |  +--------+ |   | |  |
 482  *  |  |  | Thread | |   | |  |  | Thread | |   | |  |  | Thread | |   | |  |
 483  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 484  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 485  *  |  |  | L2 Cache     | |  |  | L2 Cache     | |  |  | L2 Cache     | |  |
 486  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 487  *  |  +-------------------+  +-------------------+  +-------------------+  |
 488  *  | +-------------------------------------------------------------------+ |
 489  *  | |                         Shared L3 Cache                           | |
 490  *  | +-------------------------------------------------------------------+ |
 491  *  | +-------------------------------------------------------------------+ |
 492  *  | |                        Memory Controller                          | |
 493  *  | +-------------------------------------------------------------------+ |
 494  *  +-----------------------------------------------------------------------+
 495  *
 496  * A side effect of this current architecture is that what we care about from a
 497  * scheduling and topology perspective, is simplified. In general we care about
 498  * understanding which logical CPUs are part of the same core and socket.
 499  *
 500  * To determine the relationship between threads and cores, Intel initially used
 501  * the identifier in the advanced programmable interrupt controller (APIC). They
 502  * also added cpuid leaf 4 to give additional information about the number of
 503  * threads and CPUs in the processor. With the addition of x2apic (which
 504  * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
 505  * additional cpuid topology leaf 0xB was added.
 506  *
 507  * AMD Topology
 508  * ------------
 509  *
 510  * When discussing AMD topology, we want to break this into three distinct
 511  * generations of topology. There's the basic topology that has been used in
 512  * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
 513  * with family 0x15 (Bulldozer), and there's the topology that was introduced
 514  * with family 0x17 (Zen). AMD also has some additional terminology that's worth
 515  * talking about.
 516  *
 517  * Until the introduction of family 0x17 (Zen), AMD did not implement something
 518  * that they considered SMT. Whether or not the AMD processors have SMT
 519  * influences many things including scheduling and reliability, availability,
 520  * and serviceability (RAS) features.
 521  *
 522  * NODE
 523  *
 524  *      AMD uses the term node to refer to a die that contains a number of cores
 525  *      and I/O resources. Depending on the processor family and model, more
 526  *      than one node can be present in the package. When there is more than one
 527  *      node this indicates a multi-chip module. Usually each node has its own
 528  *      access to memory and I/O devices. This is important and generally
 529  *      different from the corresponding Intel Nehalem-Skylake+ processors. As a
 530  *      result, we track this relationship in the operating system.
 531  *
 532  *      In processors with an L3 cache, the L3 cache is generally shared across
 533  *      the entire node, though the way this is carved up varies from generation
 534  *      to generation.
 535  *
 536  * BULLDOZER
 537  *
 538  *      Starting with the Bulldozer family (0x15) and continuing until the
 539  *      introduction of the Zen microarchitecture, AMD introduced the idea of a
 540  *      compute unit. In a compute unit, two traditional cores share a number of
 541  *      hardware resources. Critically, they share the FPU, L1 instruction
 542  *      cache, and the L2 cache. Several compute units were then combined inside
 543  *      of a single node.  Because the integer execution units, L1 data cache,
 544  *      and some other resources were not shared between the cores, AMD never
 545  *      considered this to be SMT.
 546  *
 547  * ZEN
 548  *
 549  *      The Zen family (0x17) uses a multi-chip module (MCM) design, the module
 550  *      is called Zeppelin. These modules are similar to the idea of nodes used
 551  *      previously. Each of these nodes has two DRAM channels which all of the
 552  *      cores in the node can access uniformly. These nodes are linked together
 553  *      in the package, creating a NUMA environment.
 554  *
 555  *      The Zeppelin die itself contains two different 'core complexes'. Each
 556  *      core complex consists of four cores which each have two threads, for a
 557  *      total of 8 logical CPUs per complex. Unlike other generations,
 558  *      where all the logical CPUs in a given node share the L3 cache, here each
 559  *      core complex has its own shared L3 cache.
 560  *
 561  *      A further thing that we need to consider is that in some configurations,
 562  *      particularly with the Threadripper line of processors, not every die
 563  *      actually has its memory controllers wired up to actual memory channels.
 564  *      This means that some cores have memory attached to them and others
 565  *      don't.
 566  *
 567  *      To put Zen in perspective, consider the following images:
 568  *
 569  *      +--------------------------------------------------------+
 570  *      | Core Complex                                           |
 571  *      | +-------------------+    +-------------------+  +---+  |
 572  *      | | Core       +----+ |    | Core       +----+ |  |   |  |
 573  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  |   |  |
 574  *      | | | Thread | +----+ |    | | Thread | +----+ |  |   |  |
 575  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  | L |  |
 576  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  | 3 |  |
 577  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 578  *      | +-------------------+    +-------------------+  | C |  |
 579  *      | +-------------------+    +-------------------+  | a |  |
 580  *      | | Core       +----+ |    | Core       +----+ |  | c |  |
 581  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  | h |  |
 582  *      | | | Thread | +----+ |    | | Thread | +----+ |  | e |  |
 583  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  |   |  |
 584  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  |   |  |
 585  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 586  *      | +-------------------+    +-------------------+  +---+  |
 587  *      |                                                        |
 588  *      +--------------------------------------------------------+
 589  *
 590  *  This first image represents a single Zen core complex that consists of four
 591  *  cores.
 592  *
 593  *
 594  *      +--------------------------------------------------------+
 595  *      | Zeppelin Die                                           |
 596  *      |  +--------------------------------------------------+  |
 597  *      |  |         I/O Units (PCIe, SATA, USB, etc.)        |  |
 598  *      |  +--------------------------------------------------+  |
 599  *      |                           HH                           |
 600  *      |          +-----------+    HH    +-----------+          |
 601  *      |          |           |    HH    |           |          |
 602  *      |          |    Core   |==========|    Core   |          |
 603  *      |          |  Complex  |==========|  Complex  |          |
 604  *      |          |           |    HH    |           |          |
 605  *      |          +-----------+    HH    +-----------+          |
 606  *      |                           HH                           |
 607  *      |  +--------------------------------------------------+  |
 608  *      |  |                Memory Controller                 |  |
 609  *      |  +--------------------------------------------------+  |
 610  *      |                                                        |
 611  *      +--------------------------------------------------------+
 612  *
 613  *  This image represents a single Zeppelin Die. Note how both cores are
 614  *  connected to the same memory controller and I/O units. While each core
 615  *  complex has its own L3 cache as seen in the first image, they both have
 616  *  uniform access to memory.
 617  *
 618  *
 619  *                      PP                     PP
 620  *                      PP                     PP
 621  *           +----------PP---------------------PP---------+
 622  *           |          PP                     PP         |
 623  *           |    +-----------+          +-----------+    |
 624  *           |    |           |          |           |    |
 625  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 626  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 627  *           |    |           |          |           |    |
 628  *           |    +-----------+ooo    ...+-----------+    |
 629  *           |          HH      ooo  ...       HH         |
 630  *           |          HH        oo..         HH         |
 631  *           |          HH        ..oo         HH         |
 632  *           |          HH      ...  ooo       HH         |
 633  *           |    +-----------+...    ooo+-----------+    |
 634  *           |    |           |          |           |    |
 635  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 636  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 637  *           |    |           |          |           |    |
 638  *           |    +-----------+          +-----------+    |
 639  *           |          PP                     PP         |
 640  *           +----------PP---------------------PP---------+
 641  *                      PP                     PP
 642  *                      PP                     PP
 643  *
 644  *  This image represents a single Zen package. In this example, it has four
 645  *  Zeppelin dies, though some configurations only have a single one. In this
 646  *  example, each die is directly connected to the next. Also, each die is
 647  *  represented as being connected to memory by the 'M' character and connected
 648  *  to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
 649  *  die is made up of two core complexes, we have multiple different NUMA
 650  *  domains that we care about for these systems.
 651  *
 652  * CPUID LEAVES
 653  *
 654  * There are a few different CPUID leaves that we can use to try and understand
 655  * the actual state of the world. As part of the introduction of family 0xf, AMD
 656  * added CPUID leaf 0x80000008. This leaf tells us the number of logical
 657  * processors that are in the system. Because families before Zen didn't have
 658  * SMT, this was always the number of cores that were in the system. However, it
 659  * should always be thought of as the number of logical threads to be consistent
 660  * between generations. In addition we also get the size of the APIC ID that is
 661  * used to represent the number of logical processors. This is important for
 662  * deriving topology information.
 663  *
 664  * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
 665  * bit between Bulldozer and later families, but it is quite useful in
 666  * determining the topology information. Because this information has changed
 667  * across family generations, it's worth calling out what these mean
 668  * explicitly. The registers have the following meanings:
 669  *
 670  *      %eax    The APIC ID. The entire register is defined to have a 32-bit
 671  *              APIC ID, even though on systems without x2apic support, it will
 672  *              be limited to 8 bits.
 673  *
 674  *      %ebx    On Bulldozer-era systems this contains information about the
 675  *              number of cores that are in a compute unit (cores that share
 676  *              resources). It also contains a per-package compute unit ID that
 677  *              identifies which compute unit the logical CPU is a part of.
 678  *
 679  *              On Zen-era systems this instead contains the number of threads
 680  *              per core and the ID of the core that the logical CPU is a part
 681  *              of. Note, this ID is unique only to the package, it is not
 682  *              globally unique across the entire system.
 683  *
 684  *      %ecx    This contains the number of nodes that exist in the package. It
 685  *              also contains an ID that identifies which node the logical CPU
 686  *              is a part of.
 687  *
 688  * Finally, we also use cpuid leaf 0x8000001D to determine information about the
 689  * cache layout to determine which logical CPUs are sharing which caches.
 690  *
 691  * illumos Topology
 692  * ----------------
 693  *
 694  * Based on the above we synthesize the information into several different
 695  * variables that we store in the 'struct cpuid_info'. We'll go into the details
 696  * of what each member is supposed to represent and their uniqueness. In
 697  * general, there are two levels of uniqueness that we care about. We care about
 698  * an ID that is globally unique. That means that it will be unique across all
 699  * entities in the system. For example, the default logical CPU ID is globally
 700  * unique. On the other hand, there is some information that we only care about
 701  * being unique within the context of a single package / socket. Here are the
 702  * variables that we keep track of and their meaning.
 703  *
 704  * Several of the values that are asking for an identifier, with the exception
 705  * of cpi_apicid, are allowed to be synthetic.
 706  *
 707  *
 708  * cpi_apicid
 709  *
 710  *      This is the value of the CPU's APIC id. This should be the full 32-bit
 711  *      ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
 712  *      APIC ID. This value is globally unique between all logical CPUs across
 713  *      all packages. This is usually required by the APIC.
 714  *
 715  * cpi_chipid
 716  *
 717  *      This value indicates the ID of the package that the logical CPU is a
 718  *      part of. This value is allowed to be synthetic. It is usually derived by
 719  *      taking the CPU's APIC ID and determining how many bits are used to
 720  *      represent CPU cores in the package. All logical CPUs that are part of
 721  *      the same package must have the same value.
 722  *
 723  * cpi_coreid
 724  *
 725  *      This represents the ID of a CPU core. Two logical CPUs should only have
 726  *      the same cpi_coreid value if they are part of the same core. These
 727  *      values may be synthetic. On systems that support SMT, this value is
 728  *      usually derived from the APIC ID, otherwise it is often synthetic and
 729  *      just set to the value of the cpu_id in the cpu_t.
 730  *
 731  * cpi_pkgcoreid
 732  *
 733  *      This is similar to the cpi_coreid in that logical CPUs that are part of
 734  *      the same core should have the same ID. The main difference is that these
 735  *      values are only required to be unique to a given socket.
 736  *
 737  * cpi_clogid
 738  *
 739  *      This represents the logical ID of a logical CPU. This value should be
 740  *      unique within a given socket for each logical CPU. This is allowed to be
 741  *      synthetic, though it is usually based off of the CPU's apic ID. The
 742  *      broader system expects that logical CPUs that have are part of the same
 743  *      core have contiguous numbers. For example, if there were two threads per
 744  *      core, then the core IDs divided by two should be the same and the first
 745  *      modulus two should be zero and the second one. For example, IDs 4 and 5
 746  *      indicate two logical CPUs that are part of the same core. But IDs 5 and
 747  *      6 represent two logical CPUs that are part of different cores.
 748  *
 749  *      While it is common for the cpi_coreid and the cpi_clogid to be derived
 750  *      from the same source, strictly speaking, they don't have to be and the
 751  *      two values should be considered logically independent. One should not
 752  *      try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
 753  *      some kind of relationship. While this is tempting, we've seen cases on
 754  *      AMD family 0xf where the system's cpu id is not related to its APIC ID.
 755  *
 756  * cpi_ncpu_per_chip
 757  *
 758  *      This value indicates the total number of logical CPUs that exist in the
 759  *      physical package. Critically, this is not the number of logical CPUs
 760  *      that exist for just the single core.
 761  *
 762  *      This value should be the same for all logical CPUs in the same package.
 763  *
 764  * cpi_ncore_per_chip
 765  *
 766  *      This value indicates the total number of physical CPU cores that exist
 767  *      in the package. The system compares this value with cpi_ncpu_per_chip to
 768  *      determine if simultaneous multi-threading (SMT) is enabled. When
 769  *      cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
 770  *      the X86FSET_HTT feature is not set. If this value is greater than one,
 771  *      than we consider the processor to have the feature X86FSET_CMP, to
 772  *      indicate that there is support for more than one core.
 773  *
 774  *      This value should be the same for all logical CPUs in the same package.
 775  *
 776  * cpi_procnodes_per_pkg
 777  *
 778  *      This value indicates the number of 'nodes' that exist in the package.
 779  *      When processors are actually a multi-chip module, this represents the
 780  *      number of such modules that exist in the package. Currently, on Intel
 781  *      based systems this member is always set to 1.
 782  *
 783  *      This value should be the same for all logical CPUs in the same package.
 784  *
 785  * cpi_procnodeid
 786  *
 787  *      This value indicates the ID of the node that the logical CPU is a part
 788  *      of. All logical CPUs that are in the same node must have the same value
 789  *      here. This value must be unique across all of the packages in the
 790  *      system.  On Intel based systems, this is currently set to the value in
 791  *      cpi_chipid because there is only one node.
 792  *
 793  * cpi_cores_per_compunit
 794  *
 795  *      This value indicates the number of cores that are part of a compute
 796  *      unit. See the AMD topology section for this. This member only has real
 797  *      meaning currently for AMD Bulldozer family processors. For all other
 798  *      processors, this should currently be set to 1.
 799  *
 800  * cpi_compunitid
 801  *
 802  *      This indicates the compute unit that the logical CPU belongs to. For
 803  *      processors without AMD Bulldozer-style compute units this should be set
 804  *      to the value of cpi_coreid.
 805  *
 806  * cpi_ncpu_shr_last_cache
 807  *
 808  *      This indicates the number of logical CPUs that are sharing the same last
 809  *      level cache. This value should be the same for all CPUs that are sharing
 810  *      that cache. The last cache refers to the cache that is closest to memory
 811  *      and furthest away from the CPU.
 812  *
 813  * cpi_last_lvl_cacheid
 814  *
 815  *      This indicates the ID of the last cache that the logical CPU uses. This
 816  *      cache is often shared between multiple logical CPUs and is the cache
 817  *      that is closest to memory and furthest away from the CPU. This value
 818  *      should be the same for a group of logical CPUs only if they actually
 819  *      share the same last level cache. IDs should not overlap between
 820  *      packages.
 821  *
 822  * cpi_ncore_bits
 823  *
 824  *      This indicates the number of bits that are required to represent all of
 825  *      the cores in the system. As cores are derived based on their APIC IDs,
 826  *      we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
 827  *      this value to be larger than the actual number of IDs that are present
 828  *      in the system. This is used to size tables by the CMI framework. It is
 829  *      only filled in for Intel and AMD CPUs.
 830  *
 831  * cpi_nthread_bits
 832  *
 833  *      This indicates the number of bits required to represent all of the IDs
 834  *      that cover the logical CPUs that exist on a given core. It's OK for this
 835  *      value to be larger than the actual number of IDs that are present in the
 836  *      system.  This is used to size tables by the CMI framework. It is
 837  *      only filled in for Intel and AMD CPUs.
 838  *
 839  * -----------
 840  * Hypervisors
 841  * -----------
 842  *
 843  * If trying to manage the differences between vendors wasn't bad enough, it can
 844  * get worse thanks to our friend hardware virtualization. Hypervisors are given
 845  * the ability to interpose on all cpuid instructions and change them to suit
 846  * their purposes. In general, this is necessary as the hypervisor wants to be
 847  * able to present a more uniform set of features or not necessarily give the
 848  * guest operating system kernel knowledge of all features so it can be
 849  * more easily migrated between systems.
 850  *
 851  * When it comes to trying to determine topology information, this can be a
 852  * double edged sword. When a hypervisor doesn't actually implement a cpuid
 853  * leaf, it'll often return all zeros. Because of that, you'll often see various
 854  * checks scattered about fields being non-zero before we assume we can use
 855  * them.
 856  *
 857  * When it comes to topology information, the hypervisor is often incentivized
 858  * to lie to you about topology. This is because it doesn't always actually
 859  * guarantee that topology at all. The topology path we take in the system
 860  * depends on how the CPU advertises itself. If it advertises itself as an Intel
 861  * or AMD CPU, then we basically do our normal path. However, when they don't
 862  * use an actual vendor, then that usually turns into multiple one-core CPUs
 863  * that we enumerate that are often on different sockets. The actual behavior
 864  * depends greatly on what the hypervisor actually exposes to us.
 865  *
 866  * --------------------
 867  * Exposing Information
 868  * --------------------
 869  *
 870  * We expose CPUID information in three different forms in the system.
 871  *
 872  * The first is through the x86_featureset variable. This is used in conjunction
 873  * with the is_x86_feature() function. This is queried by x86-specific functions
 874  * to determine which features are or aren't present in the system and to make
 875  * decisions based upon them. For example, users of this include everything from
 876  * parts of the system dedicated to reliability, availability, and
 877  * serviceability (RAS), to making decisions about how to handle security
 878  * mitigations, to various x86-specific drivers. General purpose or
 879  * architecture independent drivers should never be calling this function.
 880  *
 881  * The second means is through the auxiliary vector. The auxiliary vector is a
 882  * series of tagged data that the kernel passes down to a user program when it
 883  * begins executing. This information is used to indicate to programs what
 884  * instruction set extensions are present. For example, information about the
 885  * CPU supporting the machine check architecture (MCA) wouldn't be passed down
 886  * since user programs cannot make use of it. However, things like the AVX
 887  * instruction sets are. Programs use this information to make run-time
 888  * decisions about what features they should use. As an example, the run-time
 889  * link-editor (rtld) can relocate different functions depending on the hardware
 890  * support available.
 891  *
 892  * The final form is through a series of accessor functions that all have the
 893  * form cpuid_get*. This is used by a number of different subsystems in the
 894  * kernel to determine more detailed information about what we're running on,
 895  * topology information, etc. Some of these subsystems include processor groups
 896  * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
 897  * microcode, and performance monitoring. These functions all ASSERT that the
 898  * CPU they're being called on has reached a certain cpuid pass. If the passes
 899  * are rearranged, then this needs to be adjusted.
 900  */
 901 
 902 #include <sys/types.h>
 903 #include <sys/archsystm.h>
 904 #include <sys/x86_archext.h>
 905 #include <sys/kmem.h>
 906 #include <sys/systm.h>
 907 #include <sys/cmn_err.h>
 908 #include <sys/sunddi.h>
 909 #include <sys/sunndi.h>
 910 #include <sys/cpuvar.h>
 911 #include <sys/processor.h>
 912 #include <sys/sysmacros.h>
 913 #include <sys/pg.h>
 914 #include <sys/fp.h>
 915 #include <sys/controlregs.h>
 916 #include <sys/bitmap.h>
 917 #include <sys/auxv_386.h>
 918 #include <sys/memnode.h>
 919 #include <sys/pci_cfgspace.h>
 920 #include <sys/comm_page.h>
 921 #include <sys/mach_mmu.h>
 922 #include <sys/ucode.h>
 923 #include <sys/tsc.h>
 924 
 925 #ifdef __xpv
 926 #include <sys/hypervisor.h>
 927 #else
 928 #include <sys/ontrap.h>
 929 #endif
 930 
 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
 932 uint_t x86_type = X86_TYPE_OTHER;
 933 uint_t x86_clflush_size = 0;
 934 
 935 #if defined(__xpv)
 936 int x86_use_pcid = 0;
 937 int x86_use_invpcid = 0;
 938 #else
 939 int x86_use_pcid = -1;
 940 int x86_use_invpcid = -1;
 941 #endif
 942 
 943 uint_t pentiumpro_bug4046376;
 944 
 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 946 
 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
 948         "lgpg",
 949         "tsc",
 950         "msr",
 951         "mtrr",
 952         "pge",
 953         "de",
 954         "cmov",
 955         "mmx",
 956         "mca",
 957         "pae",
 958         "cv8",
 959         "pat",
 960         "sep",
 961         "sse",
 962         "sse2",
 963         "htt",
 964         "asysc",
 965         "nx",
 966         "sse3",
 967         "cx16",
 968         "cmp",
 969         "tscp",
 970         "mwait",
 971         "sse4a",
 972         "cpuid",
 973         "ssse3",
 974         "sse4_1",
 975         "sse4_2",
 976         "1gpg",
 977         "clfsh",
 978         "64",
 979         "aes",
 980         "pclmulqdq",
 981         "xsave",
 982         "avx",
 983         "vmx",
 984         "svm",
 985         "topoext",
 986         "f16c",
 987         "rdrand",
 988         "x2apic",
 989         "avx2",
 990         "bmi1",
 991         "bmi2",
 992         "fma",
 993         "smep",
 994         "smap",
 995         "adx",
 996         "rdseed",
 997         "mpx",
 998         "avx512f",
 999         "avx512dq",
1000         "avx512pf",
1001         "avx512er",
1002         "avx512cd",
1003         "avx512bw",
1004         "avx512vl",
1005         "avx512fma",
1006         "avx512vbmi",
1007         "avx512_vpopcntdq",
1008         "avx512_4vnniw",
1009         "avx512_4fmaps",
1010         "xsaveopt",
1011         "xsavec",
1012         "xsaves",
1013         "sha",
1014         "umip",
1015         "pku",
1016         "ospke",
1017         "pcid",
1018         "invpcid",
1019         "ibrs",
1020         "ibpb",
1021         "stibp",
1022         "ssbd",
1023         "ssbd_virt",
1024         "rdcl_no",
1025         "ibrs_all",
1026         "rsba",
1027         "ssb_no",
1028         "stibp_all",
1029         "flush_cmd",
1030         "l1d_vmentry_no",
1031         "fsgsbase",
1032         "clflushopt",
1033         "clwb",
1034         "monitorx",
1035         "clzero",
1036         "xop",
1037         "fma4",
1038         "tbm",
1039         "avx512_vnni"
1040 };
1041 
1042 boolean_t
1043 is_x86_feature(void *featureset, uint_t feature)
1044 {
1045         ASSERT(feature < NUM_X86_FEATURES);
1046         return (BT_TEST((ulong_t *)featureset, feature));
1047 }
1048 
1049 void
1050 add_x86_feature(void *featureset, uint_t feature)
1051 {
1052         ASSERT(feature < NUM_X86_FEATURES);
1053         BT_SET((ulong_t *)featureset, feature);
1054 }
1055 
1056 void
1057 remove_x86_feature(void *featureset, uint_t feature)
1058 {
1059         ASSERT(feature < NUM_X86_FEATURES);
1060         BT_CLEAR((ulong_t *)featureset, feature);
1061 }
1062 
1063 boolean_t
1064 compare_x86_featureset(void *setA, void *setB)
1065 {
1066         /*
1067          * We assume that the unused bits of the bitmap are always zero.
1068          */
1069         if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1070                 return (B_TRUE);
1071         } else {
1072                 return (B_FALSE);
1073         }
1074 }
1075 
1076 void
1077 print_x86_featureset(void *featureset)
1078 {
1079         uint_t i;
1080 
1081         for (i = 0; i < NUM_X86_FEATURES; i++) {
1082                 if (is_x86_feature(featureset, i)) {
1083                         cmn_err(CE_CONT, "?x86_feature: %s\n",
1084                             x86_feature_names[i]);
1085                 }
1086         }
1087 }
1088 
1089 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1090 static size_t xsave_state_size = 0;
1091 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1092 boolean_t xsave_force_disable = B_FALSE;
1093 extern int disable_smap;
1094 
1095 /*
1096  * This is set to platform type we are running on.
1097  */
1098 static int platform_type = -1;
1099 
1100 #if !defined(__xpv)
1101 /*
1102  * Variable to patch if hypervisor platform detection needs to be
1103  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1104  */
1105 int enable_platform_detection = 1;
1106 #endif
1107 
1108 /*
1109  * monitor/mwait info.
1110  *
1111  * size_actual and buf_actual are the real address and size allocated to get
1112  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
1113  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
1114  * processor cache-line alignment, but this is not guarantied in the furture.
1115  */
1116 struct mwait_info {
1117         size_t          mon_min;        /* min size to avoid missed wakeups */
1118         size_t          mon_max;        /* size to avoid false wakeups */
1119         size_t          size_actual;    /* size actually allocated */
1120         void            *buf_actual;    /* memory actually allocated */
1121         uint32_t        support;        /* processor support of monitor/mwait */
1122 };
1123 
1124 /*
1125  * xsave/xrestor info.
1126  *
1127  * This structure contains HW feature bits and the size of the xsave save area.
1128  * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1129  * (xsave_state) to describe the xsave layout. However, at runtime the
1130  * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1131  * xsave_state structure simply represents the legacy layout of the beginning
1132  * of the xsave area.
1133  */
1134 struct xsave_info {
1135         uint32_t        xsav_hw_features_low;   /* Supported HW features */
1136         uint32_t        xsav_hw_features_high;  /* Supported HW features */
1137         size_t          xsav_max_size;  /* max size save area for HW features */
1138         size_t          ymm_size;       /* AVX: size of ymm save area */
1139         size_t          ymm_offset;     /* AVX: offset for ymm save area */
1140         size_t          bndregs_size;   /* MPX: size of bndregs save area */
1141         size_t          bndregs_offset; /* MPX: offset for bndregs save area */
1142         size_t          bndcsr_size;    /* MPX: size of bndcsr save area */
1143         size_t          bndcsr_offset;  /* MPX: offset for bndcsr save area */
1144         size_t          opmask_size;    /* AVX512: size of opmask save */
1145         size_t          opmask_offset;  /* AVX512: offset for opmask save */
1146         size_t          zmmlo_size;     /* AVX512: size of zmm 256 save */
1147         size_t          zmmlo_offset;   /* AVX512: offset for zmm 256 save */
1148         size_t          zmmhi_size;     /* AVX512: size of zmm hi reg save */
1149         size_t          zmmhi_offset;   /* AVX512: offset for zmm hi reg save */
1150 };
1151 
1152 
1153 /*
1154  * These constants determine how many of the elements of the
1155  * cpuid we cache in the cpuid_info data structure; the
1156  * remaining elements are accessible via the cpuid instruction.
1157  */
1158 
1159 #define NMAX_CPI_STD    8               /* eax = 0 .. 7 */
1160 #define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
1161 
1162 /*
1163  * See the big theory statement for a more detailed explanation of what some of
1164  * these members mean.
1165  */
1166 struct cpuid_info {
1167         uint_t cpi_pass;                /* last pass completed */
1168         /*
1169          * standard function information
1170          */
1171         uint_t cpi_maxeax;              /* fn 0: %eax */
1172         char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
1173         uint_t cpi_vendor;              /* enum of cpi_vendorstr */
1174 
1175         uint_t cpi_family;              /* fn 1: extended family */
1176         uint_t cpi_model;               /* fn 1: extended model */
1177         uint_t cpi_step;                /* fn 1: stepping */
1178         chipid_t cpi_chipid;            /* fn 1: %ebx:  Intel: chip # */
1179                                         /*              AMD: package/socket # */
1180         uint_t cpi_brandid;             /* fn 1: %ebx: brand ID */
1181         int cpi_clogid;                 /* fn 1: %ebx: thread # */
1182         uint_t cpi_ncpu_per_chip;       /* fn 1: %ebx: logical cpu count */
1183         uint8_t cpi_cacheinfo[16];      /* fn 2: intel-style cache desc */
1184         uint_t cpi_ncache;              /* fn 2: number of elements */
1185         uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1186         id_t cpi_last_lvl_cacheid;      /* fn 4: %eax: derived cache id */
1187         uint_t cpi_cache_leaf_size;     /* Number of cache elements */
1188                                         /* Intel fn: 4, AMD fn: 8000001d */
1189         struct cpuid_regs **cpi_cache_leaves;   /* Acual leaves from above */
1190         struct cpuid_regs cpi_std[NMAX_CPI_STD];        /* 0 .. 7 */
1191         /*
1192          * extended function information
1193          */
1194         uint_t cpi_xmaxeax;             /* fn 0x80000000: %eax */
1195         char cpi_brandstr[49];          /* fn 0x8000000[234] */
1196         uint8_t cpi_pabits;             /* fn 0x80000006: %eax */
1197         uint8_t cpi_vabits;             /* fn 0x80000006: %eax */
1198         uint8_t cpi_fp_amd_save;        /* AMD: FP error pointer save rqd. */
1199         struct  cpuid_regs cpi_extd[NMAX_CPI_EXTD];     /* 0x800000XX */
1200 
1201         id_t cpi_coreid;                /* same coreid => strands share core */
1202         int cpi_pkgcoreid;              /* core number within single package */
1203         uint_t cpi_ncore_per_chip;      /* AMD: fn 0x80000008: %ecx[7-0] */
1204                                         /* Intel: fn 4: %eax[31-26] */
1205 
1206         /*
1207          * These values represent the number of bits that are required to store
1208          * information about the number of cores and threads.
1209          */
1210         uint_t cpi_ncore_bits;
1211         uint_t cpi_nthread_bits;
1212         /*
1213          * supported feature information
1214          */
1215         uint32_t cpi_support[6];
1216 #define STD_EDX_FEATURES        0
1217 #define AMD_EDX_FEATURES        1
1218 #define TM_EDX_FEATURES         2
1219 #define STD_ECX_FEATURES        3
1220 #define AMD_ECX_FEATURES        4
1221 #define STD_EBX_FEATURES        5
1222         /*
1223          * Synthesized information, where known.
1224          */
1225         uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
1226         const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
1227         uint32_t cpi_socket;            /* Chip package/socket type */
1228 
1229         struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
1230         uint32_t cpi_apicid;
1231         uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
1232         uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
1233                                         /* Intel: 1 */
1234         uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
1235         uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
1236 
1237         struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
1238 };
1239 
1240 
1241 static struct cpuid_info cpuid_info0;
1242 
1243 /*
1244  * These bit fields are defined by the Intel Application Note AP-485
1245  * "Intel Processor Identification and the CPUID Instruction"
1246  */
1247 #define CPI_FAMILY_XTD(cpi)     BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1248 #define CPI_MODEL_XTD(cpi)      BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1249 #define CPI_TYPE(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1250 #define CPI_FAMILY(cpi)         BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1251 #define CPI_STEP(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1252 #define CPI_MODEL(cpi)          BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1253 
1254 #define CPI_FEATURES_EDX(cpi)           ((cpi)->cpi_std[1].cp_edx)
1255 #define CPI_FEATURES_ECX(cpi)           ((cpi)->cpi_std[1].cp_ecx)
1256 #define CPI_FEATURES_XTD_EDX(cpi)       ((cpi)->cpi_extd[1].cp_edx)
1257 #define CPI_FEATURES_XTD_ECX(cpi)       ((cpi)->cpi_extd[1].cp_ecx)
1258 #define CPI_FEATURES_7_0_EBX(cpi)       ((cpi)->cpi_std[7].cp_ebx)
1259 #define CPI_FEATURES_7_0_ECX(cpi)       ((cpi)->cpi_std[7].cp_ecx)
1260 #define CPI_FEATURES_7_0_EDX(cpi)       ((cpi)->cpi_std[7].cp_edx)
1261 
1262 #define CPI_BRANDID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1263 #define CPI_CHUNKS(cpi)         BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1264 #define CPI_CPU_COUNT(cpi)      BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1265 #define CPI_APIC_ID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1266 
1267 #define CPI_MAXEAX_MAX          0x100           /* sanity control */
1268 #define CPI_XMAXEAX_MAX         0x80000100
1269 #define CPI_FN4_ECX_MAX         0x20            /* sanity: max fn 4 levels */
1270 #define CPI_FNB_ECX_MAX         0x20            /* sanity: max fn B levels */
1271 
1272 /*
1273  * Function 4 (Deterministic Cache Parameters) macros
1274  * Defined by Intel Application Note AP-485
1275  */
1276 #define CPI_NUM_CORES(regs)             BITX((regs)->cp_eax, 31, 26)
1277 #define CPI_NTHR_SHR_CACHE(regs)        BITX((regs)->cp_eax, 25, 14)
1278 #define CPI_FULL_ASSOC_CACHE(regs)      BITX((regs)->cp_eax, 9, 9)
1279 #define CPI_SELF_INIT_CACHE(regs)       BITX((regs)->cp_eax, 8, 8)
1280 #define CPI_CACHE_LVL(regs)             BITX((regs)->cp_eax, 7, 5)
1281 #define CPI_CACHE_TYPE(regs)            BITX((regs)->cp_eax, 4, 0)
1282 #define CPI_CPU_LEVEL_TYPE(regs)        BITX((regs)->cp_ecx, 15, 8)
1283 
1284 #define CPI_CACHE_WAYS(regs)            BITX((regs)->cp_ebx, 31, 22)
1285 #define CPI_CACHE_PARTS(regs)           BITX((regs)->cp_ebx, 21, 12)
1286 #define CPI_CACHE_COH_LN_SZ(regs)       BITX((regs)->cp_ebx, 11, 0)
1287 
1288 #define CPI_CACHE_SETS(regs)            BITX((regs)->cp_ecx, 31, 0)
1289 
1290 #define CPI_PREFCH_STRIDE(regs)         BITX((regs)->cp_edx, 9, 0)
1291 
1292 
1293 /*
1294  * A couple of shorthand macros to identify "later" P6-family chips
1295  * like the Pentium M and Core.  First, the "older" P6-based stuff
1296  * (loosely defined as "pre-Pentium-4"):
1297  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1298  */
1299 #define IS_LEGACY_P6(cpi) (                     \
1300         cpi->cpi_family == 6 &&                      \
1301                 (cpi->cpi_model == 1 ||              \
1302                 cpi->cpi_model == 3 ||               \
1303                 cpi->cpi_model == 5 ||               \
1304                 cpi->cpi_model == 6 ||               \
1305                 cpi->cpi_model == 7 ||               \
1306                 cpi->cpi_model == 8 ||               \
1307                 cpi->cpi_model == 0xA ||     \
1308                 cpi->cpi_model == 0xB)               \
1309 )
1310 
1311 /* A "new F6" is everything with family 6 that's not the above */
1312 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1313 
1314 /* Extended family/model support */
1315 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1316         cpi->cpi_family >= 0xf)
1317 
1318 /*
1319  * Info for monitor/mwait idle loop.
1320  *
1321  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1322  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1323  * 2006.
1324  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1325  * Documentation Updates" #33633, Rev 2.05, December 2006.
1326  */
1327 #define MWAIT_SUPPORT           (0x00000001)    /* mwait supported */
1328 #define MWAIT_EXTENSIONS        (0x00000002)    /* extenstion supported */
1329 #define MWAIT_ECX_INT_ENABLE    (0x00000004)    /* ecx 1 extension supported */
1330 #define MWAIT_SUPPORTED(cpi)    ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1331 #define MWAIT_INT_ENABLE(cpi)   ((cpi)->cpi_std[5].cp_ecx & 0x2)
1332 #define MWAIT_EXTENSION(cpi)    ((cpi)->cpi_std[5].cp_ecx & 0x1)
1333 #define MWAIT_SIZE_MIN(cpi)     BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1334 #define MWAIT_SIZE_MAX(cpi)     BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1335 /*
1336  * Number of sub-cstates for a given c-state.
1337  */
1338 #define MWAIT_NUM_SUBC_STATES(cpi, c_state)                     \
1339         BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1340 
1341 /*
1342  * XSAVE leaf 0xD enumeration
1343  */
1344 #define CPUID_LEAFD_2_YMM_OFFSET        576
1345 #define CPUID_LEAFD_2_YMM_SIZE          256
1346 
1347 /*
1348  * Common extended leaf names to cut down on typos.
1349  */
1350 #define CPUID_LEAF_EXT_0                0x80000000
1351 #define CPUID_LEAF_EXT_8                0x80000008
1352 #define CPUID_LEAF_EXT_1d               0x8000001d
1353 #define CPUID_LEAF_EXT_1e               0x8000001e
1354 
1355 /*
1356  * Functions we consune from cpuid_subr.c;  don't publish these in a header
1357  * file to try and keep people using the expected cpuid_* interfaces.
1358  */
1359 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1360 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1361 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1362 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1363 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1364 
1365 /*
1366  * Apply up various platform-dependent restrictions where the
1367  * underlying platform restrictions mean the CPU can be marked
1368  * as less capable than its cpuid instruction would imply.
1369  */
1370 #if defined(__xpv)
1371 static void
1372 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1373 {
1374         switch (eax) {
1375         case 1: {
1376                 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1377                     0 : CPUID_INTC_EDX_MCA;
1378                 cp->cp_edx &=
1379                     ~(mcamask |
1380                     CPUID_INTC_EDX_PSE |
1381                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1382                     CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1383                     CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1384                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1385                     CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1386                 break;
1387         }
1388 
1389         case 0x80000001:
1390                 cp->cp_edx &=
1391                     ~(CPUID_AMD_EDX_PSE |
1392                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1393                     CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1394                     CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1395                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1396                     CPUID_AMD_EDX_TSCP);
1397                 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1398                 break;
1399         default:
1400                 break;
1401         }
1402 
1403         switch (vendor) {
1404         case X86_VENDOR_Intel:
1405                 switch (eax) {
1406                 case 4:
1407                         /*
1408                          * Zero out the (ncores-per-chip - 1) field
1409                          */
1410                         cp->cp_eax &= 0x03fffffff;
1411                         break;
1412                 default:
1413                         break;
1414                 }
1415                 break;
1416         case X86_VENDOR_AMD:
1417                 switch (eax) {
1418 
1419                 case 0x80000001:
1420                         cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1421                         break;
1422 
1423                 case CPUID_LEAF_EXT_8:
1424                         /*
1425                          * Zero out the (ncores-per-chip - 1) field
1426                          */
1427                         cp->cp_ecx &= 0xffffff00;
1428                         break;
1429                 default:
1430                         break;
1431                 }
1432                 break;
1433         default:
1434                 break;
1435         }
1436 }
1437 #else
1438 #define platform_cpuid_mangle(vendor, eax, cp)  /* nothing */
1439 #endif
1440 
1441 /*
1442  *  Some undocumented ways of patching the results of the cpuid
1443  *  instruction to permit running Solaris 10 on future cpus that
1444  *  we don't currently support.  Could be set to non-zero values
1445  *  via settings in eeprom.
1446  */
1447 
1448 uint32_t cpuid_feature_ecx_include;
1449 uint32_t cpuid_feature_ecx_exclude;
1450 uint32_t cpuid_feature_edx_include;
1451 uint32_t cpuid_feature_edx_exclude;
1452 
1453 /*
1454  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1455  */
1456 void
1457 cpuid_alloc_space(cpu_t *cpu)
1458 {
1459         /*
1460          * By convention, cpu0 is the boot cpu, which is set up
1461          * before memory allocation is available.  All other cpus get
1462          * their cpuid_info struct allocated here.
1463          */
1464         ASSERT(cpu->cpu_id != 0);
1465         ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1466         cpu->cpu_m.mcpu_cpi =
1467             kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1468 }
1469 
1470 void
1471 cpuid_free_space(cpu_t *cpu)
1472 {
1473         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1474         int i;
1475 
1476         ASSERT(cpi != NULL);
1477         ASSERT(cpi != &cpuid_info0);
1478 
1479         /*
1480          * Free up any cache leaf related dynamic storage. The first entry was
1481          * cached from the standard cpuid storage, so we should not free it.
1482          */
1483         for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1484                 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1485         if (cpi->cpi_cache_leaf_size > 0)
1486                 kmem_free(cpi->cpi_cache_leaves,
1487                     cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1488 
1489         kmem_free(cpi, sizeof (*cpi));
1490         cpu->cpu_m.mcpu_cpi = NULL;
1491 }
1492 
1493 #if !defined(__xpv)
1494 /*
1495  * Determine the type of the underlying platform. This is used to customize
1496  * initialization of various subsystems (e.g. TSC). determine_platform() must
1497  * only ever be called once to prevent two processors from seeing different
1498  * values of platform_type. Must be called before cpuid_pass1(), the earliest
1499  * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1500  */
1501 void
1502 determine_platform(void)
1503 {
1504         struct cpuid_regs cp;
1505         uint32_t base;
1506         uint32_t regs[4];
1507         char *hvstr = (char *)regs;
1508 
1509         ASSERT(platform_type == -1);
1510 
1511         platform_type = HW_NATIVE;
1512 
1513         if (!enable_platform_detection)
1514                 return;
1515 
1516         /*
1517          * If Hypervisor CPUID bit is set, try to determine hypervisor
1518          * vendor signature, and set platform type accordingly.
1519          *
1520          * References:
1521          * http://lkml.org/lkml/2008/10/1/246
1522          * http://kb.vmware.com/kb/1009458
1523          */
1524         cp.cp_eax = 0x1;
1525         (void) __cpuid_insn(&cp);
1526         if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1527                 cp.cp_eax = 0x40000000;
1528                 (void) __cpuid_insn(&cp);
1529                 regs[0] = cp.cp_ebx;
1530                 regs[1] = cp.cp_ecx;
1531                 regs[2] = cp.cp_edx;
1532                 regs[3] = 0;
1533                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1534                         platform_type = HW_XEN_HVM;
1535                         return;
1536                 }
1537                 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1538                         platform_type = HW_VMWARE;
1539                         return;
1540                 }
1541                 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1542                         platform_type = HW_KVM;
1543                         return;
1544                 }
1545                 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1546                         platform_type = HW_BHYVE;
1547                         return;
1548                 }
1549                 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1550                         platform_type = HW_MICROSOFT;
1551         } else {
1552                 /*
1553                  * Check older VMware hardware versions. VMware hypervisor is
1554                  * detected by performing an IN operation to VMware hypervisor
1555                  * port and checking that value returned in %ebx is VMware
1556                  * hypervisor magic value.
1557                  *
1558                  * References: http://kb.vmware.com/kb/1009458
1559                  */
1560                 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1561                 if (regs[1] == VMWARE_HVMAGIC) {
1562                         platform_type = HW_VMWARE;
1563                         return;
1564                 }
1565         }
1566 
1567         /*
1568          * Check Xen hypervisor. In a fully virtualized domain,
1569          * Xen's pseudo-cpuid function returns a string representing the
1570          * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1571          * supported cpuid function. We need at least a (base + 2) leaf value
1572          * to do what we want to do. Try different base values, since the
1573          * hypervisor might use a different one depending on whether Hyper-V
1574          * emulation is switched on by default or not.
1575          */
1576         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1577                 cp.cp_eax = base;
1578                 (void) __cpuid_insn(&cp);
1579                 regs[0] = cp.cp_ebx;
1580                 regs[1] = cp.cp_ecx;
1581                 regs[2] = cp.cp_edx;
1582                 regs[3] = 0;
1583                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1584                     cp.cp_eax >= (base + 2)) {
1585                         platform_type &= ~HW_NATIVE;
1586                         platform_type |= HW_XEN_HVM;
1587                         return;
1588                 }
1589         }
1590 }
1591 
1592 int
1593 get_hwenv(void)
1594 {
1595         ASSERT(platform_type != -1);
1596         return (platform_type);
1597 }
1598 
1599 int
1600 is_controldom(void)
1601 {
1602         return (0);
1603 }
1604 
1605 #else
1606 
1607 int
1608 get_hwenv(void)
1609 {
1610         return (HW_XEN_PV);
1611 }
1612 
1613 int
1614 is_controldom(void)
1615 {
1616         return (DOMAIN_IS_INITDOMAIN(xen_info));
1617 }
1618 
1619 #endif  /* __xpv */
1620 
1621 /*
1622  * Make sure that we have gathered all of the CPUID leaves that we might need to
1623  * determine topology. We assume that the standard leaf 1 has already been done
1624  * and that xmaxeax has already been calculated.
1625  */
1626 static void
1627 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1628 {
1629         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1630 
1631         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1632                 struct cpuid_regs *cp;
1633 
1634                 cp = &cpi->cpi_extd[8];
1635                 cp->cp_eax = CPUID_LEAF_EXT_8;
1636                 (void) __cpuid_insn(cp);
1637                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1638         }
1639 
1640         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1641             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1642                 struct cpuid_regs *cp;
1643 
1644                 cp = &cpi->cpi_extd[0x1e];
1645                 cp->cp_eax = CPUID_LEAF_EXT_1e;
1646                 (void) __cpuid_insn(cp);
1647         }
1648 }
1649 
1650 /*
1651  * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1652  * it to everything else. If not, and we're on an AMD system where 8000001e is
1653  * valid, then we use that. Othewrise, we fall back to the default value for the
1654  * APIC ID in leaf 1.
1655  */
1656 static uint32_t
1657 cpuid_gather_apicid(struct cpuid_info *cpi)
1658 {
1659         /*
1660          * Leaf B changes based on the arguments to it. Beacuse we don't cache
1661          * it, we need to gather it again.
1662          */
1663         if (cpi->cpi_maxeax >= 0xB) {
1664                 struct cpuid_regs regs;
1665                 struct cpuid_regs *cp;
1666 
1667                 cp = &regs;
1668                 cp->cp_eax = 0xB;
1669                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1670                 (void) __cpuid_insn(cp);
1671 
1672                 if (cp->cp_ebx != 0) {
1673                         return (cp->cp_edx);
1674                 }
1675         }
1676 
1677         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1678             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1679             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1680                 return (cpi->cpi_extd[0x1e].cp_eax);
1681         }
1682 
1683         return (CPI_APIC_ID(cpi));
1684 }
1685 
1686 /*
1687  * For AMD processors, attempt to calculate the number of chips and cores that
1688  * exist. The way that we do this varies based on the generation, because the
1689  * generations themselves have changed dramatically.
1690  *
1691  * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1692  * However, with the advent of family 17h (Zen) it actually tells us the number
1693  * of threads, so we need to look at leaf 0x8000001e if available to determine
1694  * its value. Otherwise, for all prior families, the number of enabled cores is
1695  * the same as threads.
1696  *
1697  * If we do not have leaf 0x80000008, then we assume that this processor does
1698  * not have anything. AMD's older CPUID specification says there's no reason to
1699  * fall back to leaf 1.
1700  *
1701  * In some virtualization cases we will not have leaf 8000001e or it will be
1702  * zero. When that happens we assume the number of threads is one.
1703  */
1704 static void
1705 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1706 {
1707         uint_t nthreads, nthread_per_core;
1708 
1709         nthreads = nthread_per_core = 1;
1710 
1711         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1712                 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1713         } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1714                 nthreads = CPI_CPU_COUNT(cpi);
1715         }
1716 
1717         /*
1718          * For us to have threads, and know about it, we have to be at least at
1719          * family 17h and have the cpuid bit that says we have extended
1720          * topology.
1721          */
1722         if (cpi->cpi_family >= 0x17 &&
1723             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1724             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1725                 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1726         }
1727 
1728         *ncpus = nthreads;
1729         *ncores = nthreads / nthread_per_core;
1730 }
1731 
1732 /*
1733  * Seed the initial values for the cores and threads for an Intel based
1734  * processor. These values will be overwritten if we detect that the processor
1735  * supports CPUID leaf 0xb.
1736  */
1737 static void
1738 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1739 {
1740         /*
1741          * Only seed the number of physical cores from the first level leaf 4
1742          * information. The number of threads there indicate how many share the
1743          * L1 cache, which may or may not have anything to do with the number of
1744          * logical CPUs per core.
1745          */
1746         if (cpi->cpi_maxeax >= 4) {
1747                 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1748         } else {
1749                 *ncores = 1;
1750         }
1751 
1752         if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1753                 *ncpus = CPI_CPU_COUNT(cpi);
1754         } else {
1755                 *ncpus = *ncores;
1756         }
1757 }
1758 
1759 static boolean_t
1760 cpuid_leafB_getids(cpu_t *cpu)
1761 {
1762         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1763         struct cpuid_regs regs;
1764         struct cpuid_regs *cp;
1765 
1766         if (cpi->cpi_maxeax < 0xB)
1767                 return (B_FALSE);
1768 
1769         cp = &regs;
1770         cp->cp_eax = 0xB;
1771         cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1772 
1773         (void) __cpuid_insn(cp);
1774 
1775         /*
1776          * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1777          * indicates that the extended topology enumeration leaf is
1778          * available.
1779          */
1780         if (cp->cp_ebx != 0) {
1781                 uint32_t x2apic_id = 0;
1782                 uint_t coreid_shift = 0;
1783                 uint_t ncpu_per_core = 1;
1784                 uint_t chipid_shift = 0;
1785                 uint_t ncpu_per_chip = 1;
1786                 uint_t i;
1787                 uint_t level;
1788 
1789                 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1790                         cp->cp_eax = 0xB;
1791                         cp->cp_ecx = i;
1792 
1793                         (void) __cpuid_insn(cp);
1794                         level = CPI_CPU_LEVEL_TYPE(cp);
1795 
1796                         if (level == 1) {
1797                                 x2apic_id = cp->cp_edx;
1798                                 coreid_shift = BITX(cp->cp_eax, 4, 0);
1799                                 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1800                         } else if (level == 2) {
1801                                 x2apic_id = cp->cp_edx;
1802                                 chipid_shift = BITX(cp->cp_eax, 4, 0);
1803                                 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1804                         }
1805                 }
1806 
1807                 /*
1808                  * cpi_apicid is taken care of in cpuid_gather_apicid.
1809                  */
1810                 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1811                 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1812                     ncpu_per_core;
1813                 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1814                 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1815                 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1816                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1817                 cpi->cpi_procnodeid = cpi->cpi_chipid;
1818                 cpi->cpi_compunitid = cpi->cpi_coreid;
1819 
1820                 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1821                         cpi->cpi_nthread_bits = coreid_shift;
1822                         cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1823                 }
1824 
1825                 return (B_TRUE);
1826         } else {
1827                 return (B_FALSE);
1828         }
1829 }
1830 
1831 static void
1832 cpuid_intel_getids(cpu_t *cpu, void *feature)
1833 {
1834         uint_t i;
1835         uint_t chipid_shift = 0;
1836         uint_t coreid_shift = 0;
1837         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1838 
1839         /*
1840          * There are no compute units or processor nodes currently on Intel.
1841          * Always set these to one.
1842          */
1843         cpi->cpi_procnodes_per_pkg = 1;
1844         cpi->cpi_cores_per_compunit = 1;
1845 
1846         /*
1847          * If cpuid Leaf B is present, use that to try and get this information.
1848          * It will be the most accurate for Intel CPUs.
1849          */
1850         if (cpuid_leafB_getids(cpu))
1851                 return;
1852 
1853         /*
1854          * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1855          * and ncore_per_chip. These represent the largest power of two values
1856          * that we need to cover all of the IDs in the system. Therefore, we use
1857          * those values to seed the number of bits needed to cover information
1858          * in the case when leaf B is not available. These values will probably
1859          * be larger than required, but that's OK.
1860          */
1861         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1862         cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1863 
1864         for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1865                 chipid_shift++;
1866 
1867         cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1868         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1869 
1870         if (is_x86_feature(feature, X86FSET_CMP)) {
1871                 /*
1872                  * Multi-core (and possibly multi-threaded)
1873                  * processors.
1874                  */
1875                 uint_t ncpu_per_core;
1876                 if (cpi->cpi_ncore_per_chip == 1)
1877                         ncpu_per_core = cpi->cpi_ncpu_per_chip;
1878                 else if (cpi->cpi_ncore_per_chip > 1)
1879                         ncpu_per_core = cpi->cpi_ncpu_per_chip /
1880                             cpi->cpi_ncore_per_chip;
1881                 /*
1882                  * 8bit APIC IDs on dual core Pentiums
1883                  * look like this:
1884                  *
1885                  * +-----------------------+------+------+
1886                  * | Physical Package ID   |  MC  |  HT  |
1887                  * +-----------------------+------+------+
1888                  * <------- chipid -------->
1889                  * <------- coreid --------------->
1890                  *                         <--- clogid -->
1891                  *                         <------>
1892                  *                         pkgcoreid
1893                  *
1894                  * Where the number of bits necessary to
1895                  * represent MC and HT fields together equals
1896                  * to the minimum number of bits necessary to
1897                  * store the value of cpi->cpi_ncpu_per_chip.
1898                  * Of those bits, the MC part uses the number
1899                  * of bits necessary to store the value of
1900                  * cpi->cpi_ncore_per_chip.
1901                  */
1902                 for (i = 1; i < ncpu_per_core; i <<= 1)
1903                         coreid_shift++;
1904                 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1905                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1906         } else if (is_x86_feature(feature, X86FSET_HTT)) {
1907                 /*
1908                  * Single-core multi-threaded processors.
1909                  */
1910                 cpi->cpi_coreid = cpi->cpi_chipid;
1911                 cpi->cpi_pkgcoreid = 0;
1912         } else {
1913                 /*
1914                  * Single-core single-thread processors.
1915                  */
1916                 cpi->cpi_coreid = cpu->cpu_id;
1917                 cpi->cpi_pkgcoreid = 0;
1918         }
1919         cpi->cpi_procnodeid = cpi->cpi_chipid;
1920         cpi->cpi_compunitid = cpi->cpi_coreid;
1921 }
1922 
1923 /*
1924  * Historically, AMD has had CMP chips with only a single thread per core.
1925  * However, starting in family 17h (Zen), this has changed and they now have
1926  * multiple threads. Our internal core id needs to be a unique value.
1927  *
1928  * To determine the core id of an AMD system, if we're from a family before 17h,
1929  * then we just use the cpu id, as that gives us a good value that will be
1930  * unique for each core. If instead, we're on family 17h or later, then we need
1931  * to do something more complicated. CPUID leaf 0x8000001e can tell us
1932  * how many threads are in the system. Based on that, we'll shift the APIC ID.
1933  * We can't use the normal core id in that leaf as it's only unique within the
1934  * socket, which is perfect for cpi_pkgcoreid, but not us.
1935  */
1936 static id_t
1937 cpuid_amd_get_coreid(cpu_t *cpu)
1938 {
1939         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1940 
1941         if (cpi->cpi_family >= 0x17 &&
1942             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1943             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1944                 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1945                 if (nthreads > 1) {
1946                         VERIFY3U(nthreads, ==, 2);
1947                         return (cpi->cpi_apicid >> 1);
1948                 }
1949         }
1950 
1951         return (cpu->cpu_id);
1952 }
1953 
1954 /*
1955  * IDs on AMD is a more challenging task. This is notable because of the
1956  * following two facts:
1957  *
1958  *  1. Before family 0x17 (Zen), there was no support for SMT and there was
1959  *     also no way to get an actual unique core id from the system. As such, we
1960  *     synthesize this case by using cpu->cpu_id.  This scheme does not,
1961  *     however, guarantee that sibling cores of a chip will have sequential
1962  *     coreids starting at a multiple of the number of cores per chip - that is
1963  *     usually the case, but if the ACPI MADT table is presented in a different
1964  *     order then we need to perform a few more gymnastics for the pkgcoreid.
1965  *
1966  *  2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1967  *     called compute units. These compute units share the L1I cache, L2 cache,
1968  *     and the FPU. To deal with this, a new topology leaf was added in
1969  *     0x8000001e. However, parts of this leaf have different meanings
1970  *     once we get to family 0x17.
1971  */
1972 
1973 static void
1974 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1975 {
1976         int i, first_half, coreidsz;
1977         uint32_t nb_caps_reg;
1978         uint_t node2_1;
1979         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1980         struct cpuid_regs *cp;
1981 
1982         /*
1983          * Calculate the core id (this comes from hardware in family 0x17 if it
1984          * hasn't been stripped by virtualization). We always set the compute
1985          * unit id to the same value. Also, initialize the default number of
1986          * cores per compute unit and nodes per package. This will be
1987          * overwritten when we know information about a particular family.
1988          */
1989         cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1990         cpi->cpi_compunitid = cpi->cpi_coreid;
1991         cpi->cpi_cores_per_compunit = 1;
1992         cpi->cpi_procnodes_per_pkg = 1;
1993 
1994         /*
1995          * To construct the logical ID, we need to determine how many APIC IDs
1996          * are dedicated to the cores and threads. This is provided for us in
1997          * 0x80000008. However, if it's not present (say due to virtualization),
1998          * then we assume it's one. This should be present on all 64-bit AMD
1999          * processors.  It was added in family 0xf (Hammer).
2000          */
2001         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2002                 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2003 
2004                 /*
2005                  * In AMD parlance chip is really a node while illumos
2006                  * uses chip as equivalent to socket/package.
2007                  */
2008                 if (coreidsz == 0) {
2009                         /* Use legacy method */
2010                         for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2011                                 coreidsz++;
2012                         if (coreidsz == 0)
2013                                 coreidsz = 1;
2014                 }
2015         } else {
2016                 /* Assume single-core part */
2017                 coreidsz = 1;
2018         }
2019         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2020 
2021         /*
2022          * The package core ID varies depending on the family. For family 17h,
2023          * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2024          * can use the clogid as is. When family 17h is virtualized, the clogid
2025          * should be sufficient as if we don't have valid data in the leaf, then
2026          * we won't think we have SMT, in which case the cpi_clogid should be
2027          * sufficient.
2028          */
2029         if (cpi->cpi_family >= 0x17 &&
2030             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2031             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2032             cpi->cpi_extd[0x1e].cp_ebx != 0) {
2033                 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2034         } else {
2035                 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2036         }
2037 
2038         /*
2039          * Obtain the node ID and compute unit IDs. If we're on family 0x15
2040          * (bulldozer) or newer, then we can derive all of this from leaf
2041          * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2042          */
2043         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2044             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2045                 cp = &cpi->cpi_extd[0x1e];
2046 
2047                 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2048                 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2049 
2050                 /*
2051                  * For Bulldozer-era CPUs, recalculate the compute unit
2052                  * information.
2053                  */
2054                 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2055                         cpi->cpi_cores_per_compunit =
2056                             BITX(cp->cp_ebx, 15, 8) + 1;
2057                         cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2058                             (cpi->cpi_ncore_per_chip /
2059                             cpi->cpi_cores_per_compunit) *
2060                             (cpi->cpi_procnodeid /
2061                             cpi->cpi_procnodes_per_pkg);
2062                 }
2063         } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2064                 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2065         } else if (cpi->cpi_family == 0x10) {
2066                 /*
2067                  * See if we are a multi-node processor.
2068                  * All processors in the system have the same number of nodes
2069                  */
2070                 nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
2071                 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2072                         /* Single-node */
2073                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2074                             coreidsz);
2075                 } else {
2076 
2077                         /*
2078                          * Multi-node revision D (2 nodes per package
2079                          * are supported)
2080                          */
2081                         cpi->cpi_procnodes_per_pkg = 2;
2082 
2083                         first_half = (cpi->cpi_pkgcoreid <=
2084                             (cpi->cpi_ncore_per_chip/2 - 1));
2085 
2086                         if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2087                                 /* We are BSP */
2088                                 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2089                         } else {
2090 
2091                                 /* We are AP */
2092                                 /* NodeId[2:1] bits to use for reading F3xe8 */
2093                                 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2094 
2095                                 nb_caps_reg =
2096                                     pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2097 
2098                                 /*
2099                                  * Check IntNodeNum bit (31:30, but bit 31 is
2100                                  * always 0 on dual-node processors)
2101                                  */
2102                                 if (BITX(nb_caps_reg, 30, 30) == 0)
2103                                         cpi->cpi_procnodeid = node2_1 +
2104                                             !first_half;
2105                                 else
2106                                         cpi->cpi_procnodeid = node2_1 +
2107                                             first_half;
2108                         }
2109                 }
2110         } else {
2111                 cpi->cpi_procnodeid = 0;
2112         }
2113 
2114         cpi->cpi_chipid =
2115             cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2116 
2117         cpi->cpi_ncore_bits = coreidsz;
2118         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2119             cpi->cpi_ncore_per_chip);
2120 }
2121 
2122 static void
2123 spec_l1d_flush_noop(void)
2124 {
2125 }
2126 
2127 static void
2128 spec_l1d_flush_msr(void)
2129 {
2130         wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2131 }
2132 
2133 void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
2134 
2135 static void
2136 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2137 {
2138         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2139 
2140         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2141             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2142                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2143                         add_x86_feature(featureset, X86FSET_IBPB);
2144                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2145                         add_x86_feature(featureset, X86FSET_IBRS);
2146                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2147                         add_x86_feature(featureset, X86FSET_STIBP);
2148                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2149                         add_x86_feature(featureset, X86FSET_IBRS_ALL);
2150                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2151                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2152                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2153                         add_x86_feature(featureset, X86FSET_RSBA);
2154                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2155                         add_x86_feature(featureset, X86FSET_SSBD);
2156                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2157                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2158                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2159                         add_x86_feature(featureset, X86FSET_SSB_NO);
2160         } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2161             cpi->cpi_maxeax >= 7) {
2162                 struct cpuid_regs *ecp;
2163                 ecp = &cpi->cpi_std[7];
2164 
2165                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2166                         add_x86_feature(featureset, X86FSET_IBRS);
2167                         add_x86_feature(featureset, X86FSET_IBPB);
2168                 }
2169 
2170                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2171                         add_x86_feature(featureset, X86FSET_STIBP);
2172                 }
2173 
2174                 /*
2175                  * Don't read the arch caps MSR on xpv where we lack the
2176                  * on_trap().
2177                  */
2178 #ifndef __xpv
2179                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2180                         on_trap_data_t otd;
2181 
2182                         /*
2183                          * Be paranoid and assume we'll get a #GP.
2184                          */
2185                         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2186                                 uint64_t reg;
2187 
2188                                 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2189                                 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2190                                         add_x86_feature(featureset,
2191                                             X86FSET_RDCL_NO);
2192                                 }
2193                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2194                                         add_x86_feature(featureset,
2195                                             X86FSET_IBRS_ALL);
2196                                 }
2197                                 if (reg & IA32_ARCH_CAP_RSBA) {
2198                                         add_x86_feature(featureset,
2199                                             X86FSET_RSBA);
2200                                 }
2201                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2202                                         add_x86_feature(featureset,
2203                                             X86FSET_L1D_VM_NO);
2204                                 }
2205                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2206                                         add_x86_feature(featureset,
2207                                             X86FSET_SSB_NO);
2208                                 }
2209                         }
2210                         no_trap();
2211                 }
2212 #endif  /* !__xpv */
2213 
2214                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2215                         add_x86_feature(featureset, X86FSET_SSBD);
2216 
2217                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2218                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2219         }
2220 
2221         if (cpu->cpu_id != 0)
2222                 return;
2223 
2224         /*
2225          * We're the boot CPU, so let's figure out our L1TF status.
2226          *
2227          * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
2228          * need to exclude with ht_acquire(), and we don't need to flush.
2229          */
2230         if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
2231                 extern int ht_exclusion;
2232                 ht_exclusion = 0;
2233                 spec_l1d_flush = spec_l1d_flush_noop;
2234                 membar_producer();
2235                 return;
2236         }
2237 
2238         /*
2239          * If HT is enabled, we will need HT exclusion, as well as the flush on
2240          * VM entry.  If HT isn't enabled, we still need at least the flush for
2241          * the L1TF sequential case.
2242          *
2243          * However, if X86FSET_L1D_VM_NO is set, we're most likely running
2244          * inside a VM ourselves, and we don't need the flush.
2245          *
2246          * If we don't have the FLUSH_CMD available at all, we'd better just
2247          * hope HT is disabled.
2248          */
2249         if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2250             !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2251                 spec_l1d_flush = spec_l1d_flush_msr;
2252         } else {
2253                 spec_l1d_flush = spec_l1d_flush_noop;
2254         }
2255 
2256         membar_producer();
2257 }
2258 
2259 /*
2260  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2261  */
2262 void
2263 setup_xfem(void)
2264 {
2265         uint64_t flags = XFEATURE_LEGACY_FP;
2266 
2267         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2268 
2269         if (is_x86_feature(x86_featureset, X86FSET_SSE))
2270                 flags |= XFEATURE_SSE;
2271 
2272         if (is_x86_feature(x86_featureset, X86FSET_AVX))
2273                 flags |= XFEATURE_AVX;
2274 
2275         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2276                 flags |= XFEATURE_AVX512;
2277 
2278         set_xcr(XFEATURE_ENABLED_MASK, flags);
2279 
2280         xsave_bv_all = flags;
2281 }
2282 
2283 static void
2284 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2285 {
2286         struct cpuid_info *cpi;
2287 
2288         cpi = cpu->cpu_m.mcpu_cpi;
2289 
2290         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2291                 cpuid_gather_amd_topology_leaves(cpu);
2292         }
2293 
2294         cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2295 
2296         /*
2297          * Before we can calculate the IDs that we should assign to this
2298          * processor, we need to understand how many cores and threads it has.
2299          */
2300         switch (cpi->cpi_vendor) {
2301         case X86_VENDOR_Intel:
2302                 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2303                     &cpi->cpi_ncore_per_chip);
2304                 break;
2305         case X86_VENDOR_AMD:
2306                 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2307                     &cpi->cpi_ncore_per_chip);
2308                 break;
2309         default:
2310                 /*
2311                  * If we have some other x86 compatible chip, it's not clear how
2312                  * they would behave. The most common case is virtualization
2313                  * today, though there are also 64-bit VIA chips. Assume that
2314                  * all we can get is the basic Leaf 1 HTT information.
2315                  */
2316                 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2317                         cpi->cpi_ncore_per_chip = 1;
2318                         cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2319                 }
2320                 break;
2321         }
2322 
2323         /*
2324          * Based on the calculated number of threads and cores, potentially
2325          * assign the HTT and CMT features.
2326          */
2327         if (cpi->cpi_ncore_per_chip > 1) {
2328                 add_x86_feature(featureset, X86FSET_CMP);
2329         }
2330 
2331         if (cpi->cpi_ncpu_per_chip > 1 &&
2332             cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2333                 add_x86_feature(featureset, X86FSET_HTT);
2334         }
2335 
2336         /*
2337          * Now that has been set up, we need to go through and calculate all of
2338          * the rest of the parameters that exist. If we think the CPU doesn't
2339          * have either SMT (HTT) or CMP, then we basically go through and fake
2340          * up information in some way. The most likely case for this is
2341          * virtualization where we have a lot of partial topology information.
2342          */
2343         if (!is_x86_feature(featureset, X86FSET_HTT) &&
2344             !is_x86_feature(featureset, X86FSET_CMP)) {
2345                 /*
2346                  * This is a single core, single-threaded processor.
2347                  */
2348                 cpi->cpi_procnodes_per_pkg = 1;
2349                 cpi->cpi_cores_per_compunit = 1;
2350                 cpi->cpi_compunitid = 0;
2351                 cpi->cpi_chipid = -1;
2352                 cpi->cpi_clogid = 0;
2353                 cpi->cpi_coreid = cpu->cpu_id;
2354                 cpi->cpi_pkgcoreid = 0;
2355                 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2356                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2357                 } else {
2358                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2359                 }
2360         } else {
2361                 switch (cpi->cpi_vendor) {
2362                 case X86_VENDOR_Intel:
2363                         cpuid_intel_getids(cpu, featureset);
2364                         break;
2365                 case X86_VENDOR_AMD:
2366                         cpuid_amd_getids(cpu, featureset);
2367                         break;
2368                 default:
2369                         /*
2370                          * In this case, it's hard to say what we should do.
2371                          * We're going to model them to the OS as single core
2372                          * threads. We don't have a good identifier for them, so
2373                          * we're just going to use the cpu id all on a single
2374                          * chip.
2375                          *
2376                          * This case has historically been different from the
2377                          * case above where we don't have HTT or CMP. While they
2378                          * could be combined, we've opted to keep it separate to
2379                          * minimize the risk of topology changes in weird cases.
2380                          */
2381                         cpi->cpi_procnodes_per_pkg = 1;
2382                         cpi->cpi_cores_per_compunit = 1;
2383                         cpi->cpi_chipid = 0;
2384                         cpi->cpi_coreid = cpu->cpu_id;
2385                         cpi->cpi_clogid = cpu->cpu_id;
2386                         cpi->cpi_pkgcoreid = cpu->cpu_id;
2387                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2388                         cpi->cpi_compunitid = cpi->cpi_coreid;
2389                         break;
2390                 }
2391         }
2392 }
2393 
2394 void
2395 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2396 {
2397         uint32_t mask_ecx, mask_edx;
2398         struct cpuid_info *cpi;
2399         struct cpuid_regs *cp;
2400         int xcpuid;
2401 #if !defined(__xpv)
2402         extern int idle_cpu_prefer_mwait;
2403 #endif
2404 
2405         /*
2406          * Space statically allocated for BSP, ensure pointer is set
2407          */
2408         if (cpu->cpu_id == 0) {
2409                 if (cpu->cpu_m.mcpu_cpi == NULL)
2410                         cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2411         }
2412 
2413         add_x86_feature(featureset, X86FSET_CPUID);
2414 
2415         cpi = cpu->cpu_m.mcpu_cpi;
2416         ASSERT(cpi != NULL);
2417         cp = &cpi->cpi_std[0];
2418         cp->cp_eax = 0;
2419         cpi->cpi_maxeax = __cpuid_insn(cp);
2420         {
2421                 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2422                 *iptr++ = cp->cp_ebx;
2423                 *iptr++ = cp->cp_edx;
2424                 *iptr++ = cp->cp_ecx;
2425                 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2426         }
2427 
2428         cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2429         x86_vendor = cpi->cpi_vendor; /* for compatibility */
2430 
2431         /*
2432          * Limit the range in case of weird hardware
2433          */
2434         if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2435                 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2436         if (cpi->cpi_maxeax < 1)
2437                 goto pass1_done;
2438 
2439         cp = &cpi->cpi_std[1];
2440         cp->cp_eax = 1;
2441         (void) __cpuid_insn(cp);
2442 
2443         /*
2444          * Extract identifying constants for easy access.
2445          */
2446         cpi->cpi_model = CPI_MODEL(cpi);
2447         cpi->cpi_family = CPI_FAMILY(cpi);
2448 
2449         if (cpi->cpi_family == 0xf)
2450                 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2451 
2452         /*
2453          * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2454          * Intel, and presumably everyone else, uses model == 0xf, as
2455          * one would expect (max value means possible overflow).  Sigh.
2456          */
2457 
2458         switch (cpi->cpi_vendor) {
2459         case X86_VENDOR_Intel:
2460                 if (IS_EXTENDED_MODEL_INTEL(cpi))
2461                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2462                 break;
2463         case X86_VENDOR_AMD:
2464                 if (CPI_FAMILY(cpi) == 0xf)
2465                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2466                 break;
2467         default:
2468                 if (cpi->cpi_model == 0xf)
2469                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2470                 break;
2471         }
2472 
2473         cpi->cpi_step = CPI_STEP(cpi);
2474         cpi->cpi_brandid = CPI_BRANDID(cpi);
2475 
2476         /*
2477          * *default* assumptions:
2478          * - believe %edx feature word
2479          * - ignore %ecx feature word
2480          * - 32-bit virtual and physical addressing
2481          */
2482         mask_edx = 0xffffffff;
2483         mask_ecx = 0;
2484 
2485         cpi->cpi_pabits = cpi->cpi_vabits = 32;
2486 
2487         switch (cpi->cpi_vendor) {
2488         case X86_VENDOR_Intel:
2489                 if (cpi->cpi_family == 5)
2490                         x86_type = X86_TYPE_P5;
2491                 else if (IS_LEGACY_P6(cpi)) {
2492                         x86_type = X86_TYPE_P6;
2493                         pentiumpro_bug4046376 = 1;
2494                         /*
2495                          * Clear the SEP bit when it was set erroneously
2496                          */
2497                         if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2498                                 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2499                 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2500                         x86_type = X86_TYPE_P4;
2501                         /*
2502                          * We don't currently depend on any of the %ecx
2503                          * features until Prescott, so we'll only check
2504                          * this from P4 onwards.  We might want to revisit
2505                          * that idea later.
2506                          */
2507                         mask_ecx = 0xffffffff;
2508                 } else if (cpi->cpi_family > 0xf)
2509                         mask_ecx = 0xffffffff;
2510                 /*
2511                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2512                  * to obtain the monitor linesize.
2513                  */
2514                 if (cpi->cpi_maxeax < 5)
2515                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2516                 break;
2517         case X86_VENDOR_IntelClone:
2518         default:
2519                 break;
2520         case X86_VENDOR_AMD:
2521 #if defined(OPTERON_ERRATUM_108)
2522                 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2523                         cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2524                         cpi->cpi_model = 0xc;
2525                 } else
2526 #endif
2527                 if (cpi->cpi_family == 5) {
2528                         /*
2529                          * AMD K5 and K6
2530                          *
2531                          * These CPUs have an incomplete implementation
2532                          * of MCA/MCE which we mask away.
2533                          */
2534                         mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2535 
2536                         /*
2537                          * Model 0 uses the wrong (APIC) bit
2538                          * to indicate PGE.  Fix it here.
2539                          */
2540                         if (cpi->cpi_model == 0) {
2541                                 if (cp->cp_edx & 0x200) {
2542                                         cp->cp_edx &= ~0x200;
2543                                         cp->cp_edx |= CPUID_INTC_EDX_PGE;
2544                                 }
2545                         }
2546 
2547                         /*
2548                          * Early models had problems w/ MMX; disable.
2549                          */
2550                         if (cpi->cpi_model < 6)
2551                                 mask_edx &= ~CPUID_INTC_EDX_MMX;
2552                 }
2553 
2554                 /*
2555                  * For newer families, SSE3 and CX16, at least, are valid;
2556                  * enable all
2557                  */
2558                 if (cpi->cpi_family >= 0xf)
2559                         mask_ecx = 0xffffffff;
2560                 /*
2561                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2562                  * to obtain the monitor linesize.
2563                  */
2564                 if (cpi->cpi_maxeax < 5)
2565                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2566 
2567 #if !defined(__xpv)
2568                 /*
2569                  * AMD has not historically used MWAIT in the CPU's idle loop.
2570                  * Pre-family-10h Opterons do not have the MWAIT instruction. We
2571                  * know for certain that in at least family 17h, per AMD, mwait
2572                  * is preferred. Families in-between are less certain.
2573                  */
2574                 if (cpi->cpi_family < 0x17) {
2575                         idle_cpu_prefer_mwait = 0;
2576                 }
2577 #endif
2578 
2579                 break;
2580         case X86_VENDOR_TM:
2581                 /*
2582                  * workaround the NT workaround in CMS 4.1
2583                  */
2584                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2585                     (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2586                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2587                 break;
2588         case X86_VENDOR_Centaur:
2589                 /*
2590                  * workaround the NT workarounds again
2591                  */
2592                 if (cpi->cpi_family == 6)
2593                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2594                 break;
2595         case X86_VENDOR_Cyrix:
2596                 /*
2597                  * We rely heavily on the probing in locore
2598                  * to actually figure out what parts, if any,
2599                  * of the Cyrix cpuid instruction to believe.
2600                  */
2601                 switch (x86_type) {
2602                 case X86_TYPE_CYRIX_486:
2603                         mask_edx = 0;
2604                         break;
2605                 case X86_TYPE_CYRIX_6x86:
2606                         mask_edx = 0;
2607                         break;
2608                 case X86_TYPE_CYRIX_6x86L:
2609                         mask_edx =
2610                             CPUID_INTC_EDX_DE |
2611                             CPUID_INTC_EDX_CX8;
2612                         break;
2613                 case X86_TYPE_CYRIX_6x86MX:
2614                         mask_edx =
2615                             CPUID_INTC_EDX_DE |
2616                             CPUID_INTC_EDX_MSR |
2617                             CPUID_INTC_EDX_CX8 |
2618                             CPUID_INTC_EDX_PGE |
2619                             CPUID_INTC_EDX_CMOV |
2620                             CPUID_INTC_EDX_MMX;
2621                         break;
2622                 case X86_TYPE_CYRIX_GXm:
2623                         mask_edx =
2624                             CPUID_INTC_EDX_MSR |
2625                             CPUID_INTC_EDX_CX8 |
2626                             CPUID_INTC_EDX_CMOV |
2627                             CPUID_INTC_EDX_MMX;
2628                         break;
2629                 case X86_TYPE_CYRIX_MediaGX:
2630                         break;
2631                 case X86_TYPE_CYRIX_MII:
2632                 case X86_TYPE_VIA_CYRIX_III:
2633                         mask_edx =
2634                             CPUID_INTC_EDX_DE |
2635                             CPUID_INTC_EDX_TSC |
2636                             CPUID_INTC_EDX_MSR |
2637                             CPUID_INTC_EDX_CX8 |
2638                             CPUID_INTC_EDX_PGE |
2639                             CPUID_INTC_EDX_CMOV |
2640                             CPUID_INTC_EDX_MMX;
2641                         break;
2642                 default:
2643                         break;
2644                 }
2645                 break;
2646         }
2647 
2648 #if defined(__xpv)
2649         /*
2650          * Do not support MONITOR/MWAIT under a hypervisor
2651          */
2652         mask_ecx &= ~CPUID_INTC_ECX_MON;
2653         /*
2654          * Do not support XSAVE under a hypervisor for now
2655          */
2656         xsave_force_disable = B_TRUE;
2657 
2658 #endif  /* __xpv */
2659 
2660         if (xsave_force_disable) {
2661                 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2662                 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2663                 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2664                 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2665         }
2666 
2667         /*
2668          * Now we've figured out the masks that determine
2669          * which bits we choose to believe, apply the masks
2670          * to the feature words, then map the kernel's view
2671          * of these feature words into its feature word.
2672          */
2673         cp->cp_edx &= mask_edx;
2674         cp->cp_ecx &= mask_ecx;
2675 
2676         /*
2677          * apply any platform restrictions (we don't call this
2678          * immediately after __cpuid_insn here, because we need the
2679          * workarounds applied above first)
2680          */
2681         platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2682 
2683         /*
2684          * In addition to ecx and edx, Intel and AMD are storing a bunch of
2685          * instruction set extensions in leaf 7's ebx, ecx, and edx.
2686          */
2687         if (cpi->cpi_maxeax >= 7) {
2688                 struct cpuid_regs *ecp;
2689                 ecp = &cpi->cpi_std[7];
2690                 ecp->cp_eax = 7;
2691                 ecp->cp_ecx = 0;
2692                 (void) __cpuid_insn(ecp);
2693 
2694                 /*
2695                  * If XSAVE has been disabled, just ignore all of the
2696                  * extended-save-area dependent flags here.
2697                  */
2698                 if (xsave_force_disable) {
2699                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2700                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2701                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2702                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2703                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2704                         ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2705                         ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2706                 }
2707 
2708                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2709                         add_x86_feature(featureset, X86FSET_SMEP);
2710 
2711                 /*
2712                  * We check disable_smap here in addition to in startup_smap()
2713                  * to ensure CPUs that aren't the boot CPU don't accidentally
2714                  * include it in the feature set and thus generate a mismatched
2715                  * x86 feature set across CPUs.
2716                  */
2717                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2718                     disable_smap == 0)
2719                         add_x86_feature(featureset, X86FSET_SMAP);
2720 
2721                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2722                         add_x86_feature(featureset, X86FSET_RDSEED);
2723 
2724                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2725                         add_x86_feature(featureset, X86FSET_ADX);
2726 
2727                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2728                         add_x86_feature(featureset, X86FSET_FSGSBASE);
2729 
2730                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2731                         add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2732 
2733                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2734                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2735                                 add_x86_feature(featureset, X86FSET_INVPCID);
2736 
2737                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2738                                 add_x86_feature(featureset, X86FSET_MPX);
2739 
2740                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2741                                 add_x86_feature(featureset, X86FSET_CLWB);
2742                 }
2743         }
2744 
2745         /*
2746          * fold in overrides from the "eeprom" mechanism
2747          */
2748         cp->cp_edx |= cpuid_feature_edx_include;
2749         cp->cp_edx &= ~cpuid_feature_edx_exclude;
2750 
2751         cp->cp_ecx |= cpuid_feature_ecx_include;
2752         cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2753 
2754         if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2755                 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2756         }
2757         if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2758                 add_x86_feature(featureset, X86FSET_TSC);
2759         }
2760         if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2761                 add_x86_feature(featureset, X86FSET_MSR);
2762         }
2763         if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2764                 add_x86_feature(featureset, X86FSET_MTRR);
2765         }
2766         if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2767                 add_x86_feature(featureset, X86FSET_PGE);
2768         }
2769         if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2770                 add_x86_feature(featureset, X86FSET_CMOV);
2771         }
2772         if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2773                 add_x86_feature(featureset, X86FSET_MMX);
2774         }
2775         if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2776             (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2777                 add_x86_feature(featureset, X86FSET_MCA);
2778         }
2779         if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2780                 add_x86_feature(featureset, X86FSET_PAE);
2781         }
2782         if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2783                 add_x86_feature(featureset, X86FSET_CX8);
2784         }
2785         if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2786                 add_x86_feature(featureset, X86FSET_CX16);
2787         }
2788         if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2789                 add_x86_feature(featureset, X86FSET_PAT);
2790         }
2791         if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2792                 add_x86_feature(featureset, X86FSET_SEP);
2793         }
2794         if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2795                 /*
2796                  * In our implementation, fxsave/fxrstor
2797                  * are prerequisites before we'll even
2798                  * try and do SSE things.
2799                  */
2800                 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2801                         add_x86_feature(featureset, X86FSET_SSE);
2802                 }
2803                 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2804                         add_x86_feature(featureset, X86FSET_SSE2);
2805                 }
2806                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2807                         add_x86_feature(featureset, X86FSET_SSE3);
2808                 }
2809                 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2810                         add_x86_feature(featureset, X86FSET_SSSE3);
2811                 }
2812                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2813                         add_x86_feature(featureset, X86FSET_SSE4_1);
2814                 }
2815                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2816                         add_x86_feature(featureset, X86FSET_SSE4_2);
2817                 }
2818                 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2819                         add_x86_feature(featureset, X86FSET_AES);
2820                 }
2821                 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2822                         add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2823                 }
2824 
2825                 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2826                         add_x86_feature(featureset, X86FSET_SHA);
2827 
2828                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2829                         add_x86_feature(featureset, X86FSET_UMIP);
2830                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2831                         add_x86_feature(featureset, X86FSET_PKU);
2832                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2833                         add_x86_feature(featureset, X86FSET_OSPKE);
2834 
2835                 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2836                         add_x86_feature(featureset, X86FSET_XSAVE);
2837 
2838                         /* We only test AVX & AVX512 when there is XSAVE */
2839 
2840                         if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2841                                 add_x86_feature(featureset,
2842                                     X86FSET_AVX);
2843 
2844                                 /*
2845                                  * Intel says we can't check these without also
2846                                  * checking AVX.
2847                                  */
2848                                 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2849                                         add_x86_feature(featureset,
2850                                             X86FSET_F16C);
2851 
2852                                 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2853                                         add_x86_feature(featureset,
2854                                             X86FSET_FMA);
2855 
2856                                 if (cpi->cpi_std[7].cp_ebx &
2857                                     CPUID_INTC_EBX_7_0_BMI1)
2858                                         add_x86_feature(featureset,
2859                                             X86FSET_BMI1);
2860 
2861                                 if (cpi->cpi_std[7].cp_ebx &
2862                                     CPUID_INTC_EBX_7_0_BMI2)
2863                                         add_x86_feature(featureset,
2864                                             X86FSET_BMI2);
2865 
2866                                 if (cpi->cpi_std[7].cp_ebx &
2867                                     CPUID_INTC_EBX_7_0_AVX2)
2868                                         add_x86_feature(featureset,
2869                                             X86FSET_AVX2);
2870                         }
2871 
2872                         if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2873                             (cpi->cpi_std[7].cp_ebx &
2874                             CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2875                                 add_x86_feature(featureset, X86FSET_AVX512F);
2876 
2877                                 if (cpi->cpi_std[7].cp_ebx &
2878                                     CPUID_INTC_EBX_7_0_AVX512DQ)
2879                                         add_x86_feature(featureset,
2880                                             X86FSET_AVX512DQ);
2881                                 if (cpi->cpi_std[7].cp_ebx &
2882                                     CPUID_INTC_EBX_7_0_AVX512IFMA)
2883                                         add_x86_feature(featureset,
2884                                             X86FSET_AVX512FMA);
2885                                 if (cpi->cpi_std[7].cp_ebx &
2886                                     CPUID_INTC_EBX_7_0_AVX512PF)
2887                                         add_x86_feature(featureset,
2888                                             X86FSET_AVX512PF);
2889                                 if (cpi->cpi_std[7].cp_ebx &
2890                                     CPUID_INTC_EBX_7_0_AVX512ER)
2891                                         add_x86_feature(featureset,
2892                                             X86FSET_AVX512ER);
2893                                 if (cpi->cpi_std[7].cp_ebx &
2894                                     CPUID_INTC_EBX_7_0_AVX512CD)
2895                                         add_x86_feature(featureset,
2896                                             X86FSET_AVX512CD);
2897                                 if (cpi->cpi_std[7].cp_ebx &
2898                                     CPUID_INTC_EBX_7_0_AVX512BW)
2899                                         add_x86_feature(featureset,
2900                                             X86FSET_AVX512BW);
2901                                 if (cpi->cpi_std[7].cp_ebx &
2902                                     CPUID_INTC_EBX_7_0_AVX512VL)
2903                                         add_x86_feature(featureset,
2904                                             X86FSET_AVX512VL);
2905 
2906                                 if (cpi->cpi_std[7].cp_ecx &
2907                                     CPUID_INTC_ECX_7_0_AVX512VBMI)
2908                                         add_x86_feature(featureset,
2909                                             X86FSET_AVX512VBMI);
2910                                 if (cpi->cpi_std[7].cp_ecx &
2911                                     CPUID_INTC_ECX_7_0_AVX512VNNI)
2912                                         add_x86_feature(featureset,
2913                                             X86FSET_AVX512VNNI);
2914                                 if (cpi->cpi_std[7].cp_ecx &
2915                                     CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2916                                         add_x86_feature(featureset,
2917                                             X86FSET_AVX512VPOPCDQ);
2918 
2919                                 if (cpi->cpi_std[7].cp_edx &
2920                                     CPUID_INTC_EDX_7_0_AVX5124NNIW)
2921                                         add_x86_feature(featureset,
2922                                             X86FSET_AVX512NNIW);
2923                                 if (cpi->cpi_std[7].cp_edx &
2924                                     CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2925                                         add_x86_feature(featureset,
2926                                             X86FSET_AVX512FMAPS);
2927                         }
2928                 }
2929         }
2930 
2931         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2932                 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2933                         add_x86_feature(featureset, X86FSET_PCID);
2934                 }
2935         }
2936 
2937         if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2938                 add_x86_feature(featureset, X86FSET_X2APIC);
2939         }
2940         if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2941                 add_x86_feature(featureset, X86FSET_DE);
2942         }
2943 #if !defined(__xpv)
2944         if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2945 
2946                 /*
2947                  * We require the CLFLUSH instruction for erratum workaround
2948                  * to use MONITOR/MWAIT.
2949                  */
2950                 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2951                         cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2952                         add_x86_feature(featureset, X86FSET_MWAIT);
2953                 } else {
2954                         extern int idle_cpu_assert_cflush_monitor;
2955 
2956                         /*
2957                          * All processors we are aware of which have
2958                          * MONITOR/MWAIT also have CLFLUSH.
2959                          */
2960                         if (idle_cpu_assert_cflush_monitor) {
2961                                 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2962                                     (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2963                         }
2964                 }
2965         }
2966 #endif  /* __xpv */
2967 
2968         if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2969                 add_x86_feature(featureset, X86FSET_VMX);
2970         }
2971 
2972         if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2973                 add_x86_feature(featureset, X86FSET_RDRAND);
2974 
2975         /*
2976          * Only need it first time, rest of the cpus would follow suit.
2977          * we only capture this for the bootcpu.
2978          */
2979         if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2980                 add_x86_feature(featureset, X86FSET_CLFSH);
2981                 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2982         }
2983         if (is_x86_feature(featureset, X86FSET_PAE))
2984                 cpi->cpi_pabits = 36;
2985 
2986         if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2987                 struct cpuid_regs r, *ecp;
2988 
2989                 ecp = &r;
2990                 ecp->cp_eax = 0xD;
2991                 ecp->cp_ecx = 1;
2992                 ecp->cp_edx = ecp->cp_ebx = 0;
2993                 (void) __cpuid_insn(ecp);
2994 
2995                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2996                         add_x86_feature(featureset, X86FSET_XSAVEOPT);
2997                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2998                         add_x86_feature(featureset, X86FSET_XSAVEC);
2999                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
3000                         add_x86_feature(featureset, X86FSET_XSAVES);
3001         }
3002 
3003         /*
3004          * Work on the "extended" feature information, doing
3005          * some basic initialization for cpuid_pass2()
3006          */
3007         xcpuid = 0;
3008         switch (cpi->cpi_vendor) {
3009         case X86_VENDOR_Intel:
3010                 /*
3011                  * On KVM we know we will have proper support for extended
3012                  * cpuid.
3013                  */
3014                 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
3015                     (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
3016                     (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
3017                         xcpuid++;
3018                 break;
3019         case X86_VENDOR_AMD:
3020                 if (cpi->cpi_family > 5 ||
3021                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3022                         xcpuid++;
3023                 break;
3024         case X86_VENDOR_Cyrix:
3025                 /*
3026                  * Only these Cyrix CPUs are -known- to support
3027                  * extended cpuid operations.
3028                  */
3029                 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
3030                     x86_type == X86_TYPE_CYRIX_GXm)
3031                         xcpuid++;
3032                 break;
3033         case X86_VENDOR_Centaur:
3034         case X86_VENDOR_TM:
3035         default:
3036                 xcpuid++;
3037                 break;
3038         }
3039 
3040         if (xcpuid) {
3041                 cp = &cpi->cpi_extd[0];
3042                 cp->cp_eax = CPUID_LEAF_EXT_0;
3043                 cpi->cpi_xmaxeax = __cpuid_insn(cp);
3044         }
3045 
3046         if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
3047 
3048                 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3049                         cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3050 
3051                 switch (cpi->cpi_vendor) {
3052                 case X86_VENDOR_Intel:
3053                 case X86_VENDOR_AMD:
3054                         if (cpi->cpi_xmaxeax < 0x80000001)
3055                                 break;
3056                         cp = &cpi->cpi_extd[1];
3057                         cp->cp_eax = 0x80000001;
3058                         (void) __cpuid_insn(cp);
3059 
3060                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3061                             cpi->cpi_family == 5 &&
3062                             cpi->cpi_model == 6 &&
3063                             cpi->cpi_step == 6) {
3064                                 /*
3065                                  * K6 model 6 uses bit 10 to indicate SYSC
3066                                  * Later models use bit 11. Fix it here.
3067                                  */
3068                                 if (cp->cp_edx & 0x400) {
3069                                         cp->cp_edx &= ~0x400;
3070                                         cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3071                                 }
3072                         }
3073 
3074                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3075 
3076                         /*
3077                          * Compute the additions to the kernel's feature word.
3078                          */
3079                         if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3080                                 add_x86_feature(featureset, X86FSET_NX);
3081                         }
3082 
3083                         /*
3084                          * Regardless whether or not we boot 64-bit,
3085                          * we should have a way to identify whether
3086                          * the CPU is capable of running 64-bit.
3087                          */
3088                         if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3089                                 add_x86_feature(featureset, X86FSET_64);
3090                         }
3091 
3092                         /* 1 GB large page - enable only for 64 bit kernel */
3093                         if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3094                                 add_x86_feature(featureset, X86FSET_1GPG);
3095                         }
3096 
3097                         if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3098                             (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3099                             (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3100                                 add_x86_feature(featureset, X86FSET_SSE4A);
3101                         }
3102 
3103                         /*
3104                          * It's really tricky to support syscall/sysret in
3105                          * the i386 kernel; we rely on sysenter/sysexit
3106                          * instead.  In the amd64 kernel, things are -way-
3107                          * better.
3108                          */
3109                         if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3110                                 add_x86_feature(featureset, X86FSET_ASYSC);
3111                         }
3112 
3113                         /*
3114                          * While we're thinking about system calls, note
3115                          * that AMD processors don't support sysenter
3116                          * in long mode at all, so don't try to program them.
3117                          */
3118                         if (x86_vendor == X86_VENDOR_AMD) {
3119                                 remove_x86_feature(featureset, X86FSET_SEP);
3120                         }
3121 
3122                         if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3123                                 add_x86_feature(featureset, X86FSET_TSCP);
3124                         }
3125 
3126                         if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3127                                 add_x86_feature(featureset, X86FSET_SVM);
3128                         }
3129 
3130                         if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3131                                 add_x86_feature(featureset, X86FSET_TOPOEXT);
3132                         }
3133 
3134                         if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3135                                 add_x86_feature(featureset, X86FSET_XOP);
3136                         }
3137 
3138                         if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3139                                 add_x86_feature(featureset, X86FSET_FMA4);
3140                         }
3141 
3142                         if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3143                                 add_x86_feature(featureset, X86FSET_TBM);
3144                         }
3145 
3146                         if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3147                                 add_x86_feature(featureset, X86FSET_MONITORX);
3148                         }
3149                         break;
3150                 default:
3151                         break;
3152                 }
3153 
3154                 /*
3155                  * Get CPUID data about processor cores and hyperthreads.
3156                  */
3157                 switch (cpi->cpi_vendor) {
3158                 case X86_VENDOR_Intel:
3159                         if (cpi->cpi_maxeax >= 4) {
3160                                 cp = &cpi->cpi_std[4];
3161                                 cp->cp_eax = 4;
3162                                 cp->cp_ecx = 0;
3163                                 (void) __cpuid_insn(cp);
3164                                 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3165                         }
3166                         /*FALLTHROUGH*/
3167                 case X86_VENDOR_AMD:
3168                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3169                                 break;
3170                         cp = &cpi->cpi_extd[8];
3171                         cp->cp_eax = CPUID_LEAF_EXT_8;
3172                         (void) __cpuid_insn(cp);
3173                         platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3174                             cp);
3175 
3176                         /*
3177                          * AMD uses ebx for some extended functions.
3178                          */
3179                         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3180                                 /*
3181                                  * While we're here, check for the AMD "Error
3182                                  * Pointer Zero/Restore" feature. This can be
3183                                  * used to setup the FP save handlers
3184                                  * appropriately.
3185                                  */
3186                                 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3187                                         cpi->cpi_fp_amd_save = 0;
3188                                 } else {
3189                                         cpi->cpi_fp_amd_save = 1;
3190                                 }
3191 
3192                                 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3193                                         add_x86_feature(featureset,
3194                                             X86FSET_CLZERO);
3195                                 }
3196                         }
3197 
3198                         /*
3199                          * Virtual and physical address limits from
3200                          * cpuid override previously guessed values.
3201                          */
3202                         cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3203                         cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3204                         break;
3205                 default:
3206                         break;
3207                 }
3208 
3209                 /*
3210                  * Get CPUID data about TSC Invariance in Deep C-State.
3211                  */
3212                 switch (cpi->cpi_vendor) {
3213                 case X86_VENDOR_Intel:
3214                 case X86_VENDOR_AMD:
3215                         if (cpi->cpi_maxeax >= 7) {
3216                                 cp = &cpi->cpi_extd[7];
3217                                 cp->cp_eax = 0x80000007;
3218                                 cp->cp_ecx = 0;
3219                                 (void) __cpuid_insn(cp);
3220                         }
3221                         break;
3222                 default:
3223                         break;
3224                 }
3225         }
3226 
3227         cpuid_pass1_topology(cpu, featureset);
3228 
3229         /*
3230          * Synthesize chip "revision" and socket type
3231          */
3232         cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3233             cpi->cpi_model, cpi->cpi_step);
3234         cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3235             cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3236         cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3237             cpi->cpi_model, cpi->cpi_step);
3238 
3239         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3240                 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3241                     cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3242                         /* Special handling for AMD FP not necessary. */
3243                         cpi->cpi_fp_amd_save = 0;
3244                 } else {
3245                         cpi->cpi_fp_amd_save = 1;
3246                 }
3247         }
3248 
3249         /*
3250          * Check the processor leaves that are used for security features.
3251          */
3252         cpuid_scan_security(cpu, featureset);
3253 
3254 pass1_done:
3255         cpi->cpi_pass = 1;
3256 }
3257 
3258 /*
3259  * Make copies of the cpuid table entries we depend on, in
3260  * part for ease of parsing now, in part so that we have only
3261  * one place to correct any of it, in part for ease of
3262  * later export to userland, and in part so we can look at
3263  * this stuff in a crash dump.
3264  */
3265 
3266 /*ARGSUSED*/
3267 void
3268 cpuid_pass2(cpu_t *cpu)
3269 {
3270         uint_t n, nmax;
3271         int i;
3272         struct cpuid_regs *cp;
3273         uint8_t *dp;
3274         uint32_t *iptr;
3275         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3276 
3277         ASSERT(cpi->cpi_pass == 1);
3278 
3279         if (cpi->cpi_maxeax < 1)
3280                 goto pass2_done;
3281 
3282         if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3283                 nmax = NMAX_CPI_STD;
3284         /*
3285          * (We already handled n == 0 and n == 1 in pass 1)
3286          */
3287         for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3288                 cp->cp_eax = n;
3289 
3290                 /*
3291                  * n == 7 was handled in pass 1
3292                  */
3293                 if (n == 7)
3294                         continue;
3295 
3296                 /*
3297                  * CPUID function 4 expects %ecx to be initialized
3298                  * with an index which indicates which cache to return
3299                  * information about. The OS is expected to call function 4
3300                  * with %ecx set to 0, 1, 2, ... until it returns with
3301                  * EAX[4:0] set to 0, which indicates there are no more
3302                  * caches.
3303                  *
3304                  * Here, populate cpi_std[4] with the information returned by
3305                  * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3306                  * when dynamic memory allocation becomes available.
3307                  *
3308                  * Note: we need to explicitly initialize %ecx here, since
3309                  * function 4 may have been previously invoked.
3310                  */
3311                 if (n == 4)
3312                         cp->cp_ecx = 0;
3313 
3314                 (void) __cpuid_insn(cp);
3315                 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3316                 switch (n) {
3317                 case 2:
3318                         /*
3319                          * "the lower 8 bits of the %eax register
3320                          * contain a value that identifies the number
3321                          * of times the cpuid [instruction] has to be
3322                          * executed to obtain a complete image of the
3323                          * processor's caching systems."
3324                          *
3325                          * How *do* they make this stuff up?
3326                          */
3327                         cpi->cpi_ncache = sizeof (*cp) *
3328                             BITX(cp->cp_eax, 7, 0);
3329                         if (cpi->cpi_ncache == 0)
3330                                 break;
3331                         cpi->cpi_ncache--;   /* skip count byte */
3332 
3333                         /*
3334                          * Well, for now, rather than attempt to implement
3335                          * this slightly dubious algorithm, we just look
3336                          * at the first 15 ..
3337                          */
3338                         if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3339                                 cpi->cpi_ncache = sizeof (*cp) - 1;
3340 
3341                         dp = cpi->cpi_cacheinfo;
3342                         if (BITX(cp->cp_eax, 31, 31) == 0) {
3343                                 uint8_t *p = (void *)&cp->cp_eax;
3344                                 for (i = 1; i < 4; i++)
3345                                         if (p[i] != 0)
3346                                                 *dp++ = p[i];
3347                         }
3348                         if (BITX(cp->cp_ebx, 31, 31) == 0) {
3349                                 uint8_t *p = (void *)&cp->cp_ebx;
3350                                 for (i = 0; i < 4; i++)
3351                                         if (p[i] != 0)
3352                                                 *dp++ = p[i];
3353                         }
3354                         if (BITX(cp->cp_ecx, 31, 31) == 0) {
3355                                 uint8_t *p = (void *)&cp->cp_ecx;
3356                                 for (i = 0; i < 4; i++)
3357                                         if (p[i] != 0)
3358                                                 *dp++ = p[i];
3359                         }
3360                         if (BITX(cp->cp_edx, 31, 31) == 0) {
3361                                 uint8_t *p = (void *)&cp->cp_edx;
3362                                 for (i = 0; i < 4; i++)
3363                                         if (p[i] != 0)
3364                                                 *dp++ = p[i];
3365                         }
3366                         break;
3367 
3368                 case 3: /* Processor serial number, if PSN supported */
3369                         break;
3370 
3371                 case 4: /* Deterministic cache parameters */
3372                         break;
3373 
3374                 case 5: /* Monitor/Mwait parameters */
3375                 {
3376                         size_t mwait_size;
3377 
3378                         /*
3379                          * check cpi_mwait.support which was set in cpuid_pass1
3380                          */
3381                         if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3382                                 break;
3383 
3384                         /*
3385                          * Protect ourself from insane mwait line size.
3386                          * Workaround for incomplete hardware emulator(s).
3387                          */
3388                         mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3389                         if (mwait_size < sizeof (uint32_t) ||
3390                             !ISP2(mwait_size)) {
3391 #if DEBUG
3392                                 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3393                                     "size %ld", cpu->cpu_id, (long)mwait_size);
3394 #endif
3395                                 break;
3396                         }
3397 
3398                         cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3399                         cpi->cpi_mwait.mon_max = mwait_size;
3400                         if (MWAIT_EXTENSION(cpi)) {
3401                                 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3402                                 if (MWAIT_INT_ENABLE(cpi))
3403                                         cpi->cpi_mwait.support |=
3404                                             MWAIT_ECX_INT_ENABLE;
3405                         }
3406                         break;
3407                 }
3408                 default:
3409                         break;
3410                 }
3411         }
3412 
3413         /*
3414          * XSAVE enumeration
3415          */
3416         if (cpi->cpi_maxeax >= 0xD) {
3417                 struct cpuid_regs regs;
3418                 boolean_t cpuid_d_valid = B_TRUE;
3419 
3420                 cp = &regs;
3421                 cp->cp_eax = 0xD;
3422                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3423 
3424                 (void) __cpuid_insn(cp);
3425 
3426                 /*
3427                  * Sanity checks for debug
3428                  */
3429                 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3430                     (cp->cp_eax & XFEATURE_SSE) == 0) {
3431                         cpuid_d_valid = B_FALSE;
3432                 }
3433 
3434                 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3435                 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3436                 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3437 
3438                 /*
3439                  * If the hw supports AVX, get the size and offset in the save
3440                  * area for the ymm state.
3441                  */
3442                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3443                         cp->cp_eax = 0xD;
3444                         cp->cp_ecx = 2;
3445                         cp->cp_edx = cp->cp_ebx = 0;
3446 
3447                         (void) __cpuid_insn(cp);
3448 
3449                         if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3450                             cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3451                                 cpuid_d_valid = B_FALSE;
3452                         }
3453 
3454                         cpi->cpi_xsave.ymm_size = cp->cp_eax;
3455                         cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3456                 }
3457 
3458                 /*
3459                  * If the hw supports MPX, get the size and offset in the
3460                  * save area for BNDREGS and BNDCSR.
3461                  */
3462                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3463                         cp->cp_eax = 0xD;
3464                         cp->cp_ecx = 3;
3465                         cp->cp_edx = cp->cp_ebx = 0;
3466 
3467                         (void) __cpuid_insn(cp);
3468 
3469                         cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3470                         cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3471 
3472                         cp->cp_eax = 0xD;
3473                         cp->cp_ecx = 4;
3474                         cp->cp_edx = cp->cp_ebx = 0;
3475 
3476                         (void) __cpuid_insn(cp);
3477 
3478                         cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3479                         cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3480                 }
3481 
3482                 /*
3483                  * If the hw supports AVX512, get the size and offset in the
3484                  * save area for the opmask registers and zmm state.
3485                  */
3486                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3487                         cp->cp_eax = 0xD;
3488                         cp->cp_ecx = 5;
3489                         cp->cp_edx = cp->cp_ebx = 0;
3490 
3491                         (void) __cpuid_insn(cp);
3492 
3493                         cpi->cpi_xsave.opmask_size = cp->cp_eax;
3494                         cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3495 
3496                         cp->cp_eax = 0xD;
3497                         cp->cp_ecx = 6;
3498                         cp->cp_edx = cp->cp_ebx = 0;
3499 
3500                         (void) __cpuid_insn(cp);
3501 
3502                         cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3503                         cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3504 
3505                         cp->cp_eax = 0xD;
3506                         cp->cp_ecx = 7;
3507                         cp->cp_edx = cp->cp_ebx = 0;
3508 
3509                         (void) __cpuid_insn(cp);
3510 
3511                         cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3512                         cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3513                 }
3514 
3515                 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3516                         xsave_state_size = 0;
3517                 } else if (cpuid_d_valid) {
3518                         xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3519                 } else {
3520                         /* Broken CPUID 0xD, probably in HVM */
3521                         cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3522                             "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3523                             ", ymm_size = %d, ymm_offset = %d\n",
3524                             cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3525                             cpi->cpi_xsave.xsav_hw_features_high,
3526                             (int)cpi->cpi_xsave.xsav_max_size,
3527                             (int)cpi->cpi_xsave.ymm_size,
3528                             (int)cpi->cpi_xsave.ymm_offset);
3529 
3530                         if (xsave_state_size != 0) {
3531                                 /*
3532                                  * This must be a non-boot CPU. We cannot
3533                                  * continue, because boot cpu has already
3534                                  * enabled XSAVE.
3535                                  */
3536                                 ASSERT(cpu->cpu_id != 0);
3537                                 cmn_err(CE_PANIC, "cpu%d: we have already "
3538                                     "enabled XSAVE on boot cpu, cannot "
3539                                     "continue.", cpu->cpu_id);
3540                         } else {
3541                                 /*
3542                                  * If we reached here on the boot CPU, it's also
3543                                  * almost certain that we'll reach here on the
3544                                  * non-boot CPUs. When we're here on a boot CPU
3545                                  * we should disable the feature, on a non-boot
3546                                  * CPU we need to confirm that we have.
3547                                  */
3548                                 if (cpu->cpu_id == 0) {
3549                                         remove_x86_feature(x86_featureset,
3550                                             X86FSET_XSAVE);
3551                                         remove_x86_feature(x86_featureset,
3552                                             X86FSET_AVX);
3553                                         remove_x86_feature(x86_featureset,
3554                                             X86FSET_F16C);
3555                                         remove_x86_feature(x86_featureset,
3556                                             X86FSET_BMI1);
3557                                         remove_x86_feature(x86_featureset,
3558                                             X86FSET_BMI2);
3559                                         remove_x86_feature(x86_featureset,
3560                                             X86FSET_FMA);
3561                                         remove_x86_feature(x86_featureset,
3562                                             X86FSET_AVX2);
3563                                         remove_x86_feature(x86_featureset,
3564                                             X86FSET_MPX);
3565                                         remove_x86_feature(x86_featureset,
3566                                             X86FSET_AVX512F);
3567                                         remove_x86_feature(x86_featureset,
3568                                             X86FSET_AVX512DQ);
3569                                         remove_x86_feature(x86_featureset,
3570                                             X86FSET_AVX512PF);
3571                                         remove_x86_feature(x86_featureset,
3572                                             X86FSET_AVX512ER);
3573                                         remove_x86_feature(x86_featureset,
3574                                             X86FSET_AVX512CD);
3575                                         remove_x86_feature(x86_featureset,
3576                                             X86FSET_AVX512BW);
3577                                         remove_x86_feature(x86_featureset,
3578                                             X86FSET_AVX512VL);
3579                                         remove_x86_feature(x86_featureset,
3580                                             X86FSET_AVX512FMA);
3581                                         remove_x86_feature(x86_featureset,
3582                                             X86FSET_AVX512VBMI);
3583                                         remove_x86_feature(x86_featureset,
3584                                             X86FSET_AVX512VNNI);
3585                                         remove_x86_feature(x86_featureset,
3586                                             X86FSET_AVX512VPOPCDQ);
3587                                         remove_x86_feature(x86_featureset,
3588                                             X86FSET_AVX512NNIW);
3589                                         remove_x86_feature(x86_featureset,
3590                                             X86FSET_AVX512FMAPS);
3591 
3592                                         CPI_FEATURES_ECX(cpi) &=
3593                                             ~CPUID_INTC_ECX_XSAVE;
3594                                         CPI_FEATURES_ECX(cpi) &=
3595                                             ~CPUID_INTC_ECX_AVX;
3596                                         CPI_FEATURES_ECX(cpi) &=
3597                                             ~CPUID_INTC_ECX_F16C;
3598                                         CPI_FEATURES_ECX(cpi) &=
3599                                             ~CPUID_INTC_ECX_FMA;
3600                                         CPI_FEATURES_7_0_EBX(cpi) &=
3601                                             ~CPUID_INTC_EBX_7_0_BMI1;
3602                                         CPI_FEATURES_7_0_EBX(cpi) &=
3603                                             ~CPUID_INTC_EBX_7_0_BMI2;
3604                                         CPI_FEATURES_7_0_EBX(cpi) &=
3605                                             ~CPUID_INTC_EBX_7_0_AVX2;
3606                                         CPI_FEATURES_7_0_EBX(cpi) &=
3607                                             ~CPUID_INTC_EBX_7_0_MPX;
3608                                         CPI_FEATURES_7_0_EBX(cpi) &=
3609                                             ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3610 
3611                                         CPI_FEATURES_7_0_ECX(cpi) &=
3612                                             ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3613 
3614                                         CPI_FEATURES_7_0_EDX(cpi) &=
3615                                             ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3616 
3617                                         xsave_force_disable = B_TRUE;
3618                                 } else {
3619                                         VERIFY(is_x86_feature(x86_featureset,
3620                                             X86FSET_XSAVE) == B_FALSE);
3621                                 }
3622                         }
3623                 }
3624         }
3625 
3626 
3627         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3628                 goto pass2_done;
3629 
3630         if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3631                 nmax = NMAX_CPI_EXTD;
3632         /*
3633          * Copy the extended properties, fixing them as we go.
3634          * (We already handled n == 0 and n == 1 in pass 1)
3635          */
3636         iptr = (void *)cpi->cpi_brandstr;
3637         for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3638                 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3639                 (void) __cpuid_insn(cp);
3640                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3641                     cp);
3642                 switch (n) {
3643                 case 2:
3644                 case 3:
3645                 case 4:
3646                         /*
3647                          * Extract the brand string
3648                          */
3649                         *iptr++ = cp->cp_eax;
3650                         *iptr++ = cp->cp_ebx;
3651                         *iptr++ = cp->cp_ecx;
3652                         *iptr++ = cp->cp_edx;
3653                         break;
3654                 case 5:
3655                         switch (cpi->cpi_vendor) {
3656                         case X86_VENDOR_AMD:
3657                                 /*
3658                                  * The Athlon and Duron were the first
3659                                  * parts to report the sizes of the
3660                                  * TLB for large pages. Before then,
3661                                  * we don't trust the data.
3662                                  */
3663                                 if (cpi->cpi_family < 6 ||
3664                                     (cpi->cpi_family == 6 &&
3665                                     cpi->cpi_model < 1))
3666                                         cp->cp_eax = 0;
3667                                 break;
3668                         default:
3669                                 break;
3670                         }
3671                         break;
3672                 case 6:
3673                         switch (cpi->cpi_vendor) {
3674                         case X86_VENDOR_AMD:
3675                                 /*
3676                                  * The Athlon and Duron were the first
3677                                  * AMD parts with L2 TLB's.
3678                                  * Before then, don't trust the data.
3679                                  */
3680                                 if (cpi->cpi_family < 6 ||
3681                                     cpi->cpi_family == 6 &&
3682                                     cpi->cpi_model < 1)
3683                                         cp->cp_eax = cp->cp_ebx = 0;
3684                                 /*
3685                                  * AMD Duron rev A0 reports L2
3686                                  * cache size incorrectly as 1K
3687                                  * when it is really 64K
3688                                  */
3689                                 if (cpi->cpi_family == 6 &&
3690                                     cpi->cpi_model == 3 &&
3691                                     cpi->cpi_step == 0) {
3692                                         cp->cp_ecx &= 0xffff;
3693                                         cp->cp_ecx |= 0x400000;
3694                                 }
3695                                 break;
3696                         case X86_VENDOR_Cyrix:  /* VIA C3 */
3697                                 /*
3698                                  * VIA C3 processors are a bit messed
3699                                  * up w.r.t. encoding cache sizes in %ecx
3700                                  */
3701                                 if (cpi->cpi_family != 6)
3702                                         break;
3703                                 /*
3704                                  * model 7 and 8 were incorrectly encoded
3705                                  *
3706                                  * xxx is model 8 really broken?
3707                                  */
3708                                 if (cpi->cpi_model == 7 ||
3709                                     cpi->cpi_model == 8)
3710                                         cp->cp_ecx =
3711                                             BITX(cp->cp_ecx, 31, 24) << 16 |
3712                                             BITX(cp->cp_ecx, 23, 16) << 12 |
3713                                             BITX(cp->cp_ecx, 15, 8) << 8 |
3714                                             BITX(cp->cp_ecx, 7, 0);
3715                                 /*
3716                                  * model 9 stepping 1 has wrong associativity
3717                                  */
3718                                 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3719                                         cp->cp_ecx |= 8 << 12;
3720                                 break;
3721                         case X86_VENDOR_Intel:
3722                                 /*
3723                                  * Extended L2 Cache features function.
3724                                  * First appeared on Prescott.
3725                                  */
3726                         default:
3727                                 break;
3728                         }
3729                         break;
3730                 default:
3731                         break;
3732                 }
3733         }
3734 
3735 pass2_done:
3736         cpi->cpi_pass = 2;
3737 }
3738 
3739 static const char *
3740 intel_cpubrand(const struct cpuid_info *cpi)
3741 {
3742         int i;
3743 
3744         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3745             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3746                 return ("i486");
3747 
3748         switch (cpi->cpi_family) {
3749         case 5:
3750                 return ("Intel Pentium(r)");
3751         case 6:
3752                 switch (cpi->cpi_model) {
3753                         uint_t celeron, xeon;
3754                         const struct cpuid_regs *cp;
3755                 case 0:
3756                 case 1:
3757                 case 2:
3758                         return ("Intel Pentium(r) Pro");
3759                 case 3:
3760                 case 4:
3761                         return ("Intel Pentium(r) II");
3762                 case 6:
3763                         return ("Intel Celeron(r)");
3764                 case 5:
3765                 case 7:
3766                         celeron = xeon = 0;
3767                         cp = &cpi->cpi_std[2];   /* cache info */
3768 
3769                         for (i = 1; i < 4; i++) {
3770                                 uint_t tmp;
3771 
3772                                 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3773                                 if (tmp == 0x40)
3774                                         celeron++;
3775                                 if (tmp >= 0x44 && tmp <= 0x45)
3776                                         xeon++;
3777                         }
3778 
3779                         for (i = 0; i < 2; i++) {
3780                                 uint_t tmp;
3781 
3782                                 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3783                                 if (tmp == 0x40)
3784                                         celeron++;
3785                                 else if (tmp >= 0x44 && tmp <= 0x45)
3786                                         xeon++;
3787                         }
3788 
3789                         for (i = 0; i < 4; i++) {
3790                                 uint_t tmp;
3791 
3792                                 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3793                                 if (tmp == 0x40)
3794                                         celeron++;
3795                                 else if (tmp >= 0x44 && tmp <= 0x45)
3796                                         xeon++;
3797                         }
3798 
3799                         for (i = 0; i < 4; i++) {
3800                                 uint_t tmp;
3801 
3802                                 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3803                                 if (tmp == 0x40)
3804                                         celeron++;
3805                                 else if (tmp >= 0x44 && tmp <= 0x45)
3806                                         xeon++;
3807                         }
3808 
3809                         if (celeron)
3810                                 return ("Intel Celeron(r)");
3811                         if (xeon)
3812                                 return (cpi->cpi_model == 5 ?
3813                                     "Intel Pentium(r) II Xeon(tm)" :
3814                                     "Intel Pentium(r) III Xeon(tm)");
3815                         return (cpi->cpi_model == 5 ?
3816                             "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3817                             "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3818                 default:
3819                         break;
3820                 }
3821         default:
3822                 break;
3823         }
3824 
3825         /* BrandID is present if the field is nonzero */
3826         if (cpi->cpi_brandid != 0) {
3827                 static const struct {
3828                         uint_t bt_bid;
3829                         const char *bt_str;
3830                 } brand_tbl[] = {
3831                         { 0x1,  "Intel(r) Celeron(r)" },
3832                         { 0x2,  "Intel(r) Pentium(r) III" },
3833                         { 0x3,  "Intel(r) Pentium(r) III Xeon(tm)" },
3834                         { 0x4,  "Intel(r) Pentium(r) III" },
3835                         { 0x6,  "Mobile Intel(r) Pentium(r) III" },
3836                         { 0x7,  "Mobile Intel(r) Celeron(r)" },
3837                         { 0x8,  "Intel(r) Pentium(r) 4" },
3838                         { 0x9,  "Intel(r) Pentium(r) 4" },
3839                         { 0xa,  "Intel(r) Celeron(r)" },
3840                         { 0xb,  "Intel(r) Xeon(tm)" },
3841                         { 0xc,  "Intel(r) Xeon(tm) MP" },
3842                         { 0xe,  "Mobile Intel(r) Pentium(r) 4" },
3843                         { 0xf,  "Mobile Intel(r) Celeron(r)" },
3844                         { 0x11, "Mobile Genuine Intel(r)" },
3845                         { 0x12, "Intel(r) Celeron(r) M" },
3846                         { 0x13, "Mobile Intel(r) Celeron(r)" },
3847                         { 0x14, "Intel(r) Celeron(r)" },
3848                         { 0x15, "Mobile Genuine Intel(r)" },
3849                         { 0x16, "Intel(r) Pentium(r) M" },
3850                         { 0x17, "Mobile Intel(r) Celeron(r)" }
3851                 };
3852                 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3853                 uint_t sgn;
3854 
3855                 sgn = (cpi->cpi_family << 8) |
3856                     (cpi->cpi_model << 4) | cpi->cpi_step;
3857 
3858                 for (i = 0; i < btblmax; i++)
3859                         if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3860                                 break;
3861                 if (i < btblmax) {
3862                         if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3863                                 return ("Intel(r) Celeron(r)");
3864                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3865                                 return ("Intel(r) Xeon(tm) MP");
3866                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3867                                 return ("Intel(r) Xeon(tm)");
3868                         return (brand_tbl[i].bt_str);
3869                 }
3870         }
3871 
3872         return (NULL);
3873 }
3874 
3875 static const char *
3876 amd_cpubrand(const struct cpuid_info *cpi)
3877 {
3878         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3879             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3880                 return ("i486 compatible");
3881 
3882         switch (cpi->cpi_family) {
3883         case 5:
3884                 switch (cpi->cpi_model) {
3885                 case 0:
3886                 case 1:
3887                 case 2:
3888                 case 3:
3889                 case 4:
3890                 case 5:
3891                         return ("AMD-K5(r)");
3892                 case 6:
3893                 case 7:
3894                         return ("AMD-K6(r)");
3895                 case 8:
3896                         return ("AMD-K6(r)-2");
3897                 case 9:
3898                         return ("AMD-K6(r)-III");
3899                 default:
3900                         return ("AMD (family 5)");
3901                 }
3902         case 6:
3903                 switch (cpi->cpi_model) {
3904                 case 1:
3905                         return ("AMD-K7(tm)");
3906                 case 0:
3907                 case 2:
3908                 case 4:
3909                         return ("AMD Athlon(tm)");
3910                 case 3:
3911                 case 7:
3912                         return ("AMD Duron(tm)");
3913                 case 6:
3914                 case 8:
3915                 case 10:
3916                         /*
3917                          * Use the L2 cache size to distinguish
3918                          */
3919                         return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3920                             "AMD Athlon(tm)" : "AMD Duron(tm)");
3921                 default:
3922                         return ("AMD (family 6)");
3923                 }
3924         default:
3925                 break;
3926         }
3927 
3928         if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3929             cpi->cpi_brandid != 0) {
3930                 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3931                 case 3:
3932                         return ("AMD Opteron(tm) UP 1xx");
3933                 case 4:
3934                         return ("AMD Opteron(tm) DP 2xx");
3935                 case 5:
3936                         return ("AMD Opteron(tm) MP 8xx");
3937                 default:
3938                         return ("AMD Opteron(tm)");
3939                 }
3940         }
3941 
3942         return (NULL);
3943 }
3944 
3945 static const char *
3946 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3947 {
3948         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3949             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3950             type == X86_TYPE_CYRIX_486)
3951                 return ("i486 compatible");
3952 
3953         switch (type) {
3954         case X86_TYPE_CYRIX_6x86:
3955                 return ("Cyrix 6x86");
3956         case X86_TYPE_CYRIX_6x86L:
3957                 return ("Cyrix 6x86L");
3958         case X86_TYPE_CYRIX_6x86MX:
3959                 return ("Cyrix 6x86MX");
3960         case X86_TYPE_CYRIX_GXm:
3961                 return ("Cyrix GXm");
3962         case X86_TYPE_CYRIX_MediaGX:
3963                 return ("Cyrix MediaGX");
3964         case X86_TYPE_CYRIX_MII:
3965                 return ("Cyrix M2");
3966         case X86_TYPE_VIA_CYRIX_III:
3967                 return ("VIA Cyrix M3");
3968         default:
3969                 /*
3970                  * Have another wild guess ..
3971                  */
3972                 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3973                         return ("Cyrix 5x86");
3974                 else if (cpi->cpi_family == 5) {
3975                         switch (cpi->cpi_model) {
3976                         case 2:
3977                                 return ("Cyrix 6x86");  /* Cyrix M1 */
3978                         case 4:
3979                                 return ("Cyrix MediaGX");
3980                         default:
3981                                 break;
3982                         }
3983                 } else if (cpi->cpi_family == 6) {
3984                         switch (cpi->cpi_model) {
3985                         case 0:
3986                                 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3987                         case 5:
3988                         case 6:
3989                         case 7:
3990                         case 8:
3991                         case 9:
3992                                 return ("VIA C3");
3993                         default:
3994                                 break;
3995                         }
3996                 }
3997                 break;
3998         }
3999         return (NULL);
4000 }
4001 
4002 /*
4003  * This only gets called in the case that the CPU extended
4004  * feature brand string (0x80000002, 0x80000003, 0x80000004)
4005  * aren't available, or contain null bytes for some reason.
4006  */
4007 static void
4008 fabricate_brandstr(struct cpuid_info *cpi)
4009 {
4010         const char *brand = NULL;
4011 
4012         switch (cpi->cpi_vendor) {
4013         case X86_VENDOR_Intel:
4014                 brand = intel_cpubrand(cpi);
4015                 break;
4016         case X86_VENDOR_AMD:
4017                 brand = amd_cpubrand(cpi);
4018                 break;
4019         case X86_VENDOR_Cyrix:
4020                 brand = cyrix_cpubrand(cpi, x86_type);
4021                 break;
4022         case X86_VENDOR_NexGen:
4023                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4024                         brand = "NexGen Nx586";
4025                 break;
4026         case X86_VENDOR_Centaur:
4027                 if (cpi->cpi_family == 5)
4028                         switch (cpi->cpi_model) {
4029                         case 4:
4030                                 brand = "Centaur C6";
4031                                 break;
4032                         case 8:
4033                                 brand = "Centaur C2";
4034                                 break;
4035                         case 9:
4036                                 brand = "Centaur C3";
4037                                 break;
4038                         default:
4039                                 break;
4040                         }
4041                 break;
4042         case X86_VENDOR_Rise:
4043                 if (cpi->cpi_family == 5 &&
4044                     (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4045                         brand = "Rise mP6";
4046                 break;
4047         case X86_VENDOR_SiS:
4048                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4049                         brand = "SiS 55x";
4050                 break;
4051         case X86_VENDOR_TM:
4052                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4053                         brand = "Transmeta Crusoe TM3x00 or TM5x00";
4054                 break;
4055         case X86_VENDOR_NSC:
4056         case X86_VENDOR_UMC:
4057         default:
4058                 break;
4059         }
4060         if (brand) {
4061                 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4062                 return;
4063         }
4064 
4065         /*
4066          * If all else fails ...
4067          */
4068         (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4069             "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4070             cpi->cpi_model, cpi->cpi_step);
4071 }
4072 
4073 /*
4074  * This routine is called just after kernel memory allocation
4075  * becomes available on cpu0, and as part of mp_startup() on
4076  * the other cpus.
4077  *
4078  * Fixup the brand string, and collect any information from cpuid
4079  * that requires dynamically allocated storage to represent.
4080  */
4081 /*ARGSUSED*/
4082 void
4083 cpuid_pass3(cpu_t *cpu)
4084 {
4085         int     i, max, shft, level, size;
4086         struct cpuid_regs regs;
4087         struct cpuid_regs *cp;
4088         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4089 
4090         ASSERT(cpi->cpi_pass == 2);
4091 
4092         /*
4093          * Deterministic cache parameters
4094          *
4095          * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4096          * values that are present are currently defined to be the same. This
4097          * means we can use the same logic to parse it as long as we use the
4098          * appropriate leaf to get the data. If you're updating this, make sure
4099          * you're careful about which vendor supports which aspect.
4100          *
4101          * Take this opportunity to detect the number of threads sharing the
4102          * last level cache, and construct a corresponding cache id. The
4103          * respective cpuid_info members are initialized to the default case of
4104          * "no last level cache sharing".
4105          */
4106         cpi->cpi_ncpu_shr_last_cache = 1;
4107         cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4108 
4109         if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4110             (cpi->cpi_vendor == X86_VENDOR_AMD &&
4111             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4112             is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4113                 uint32_t leaf;
4114 
4115                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4116                         leaf = 4;
4117                 } else {
4118                         leaf = CPUID_LEAF_EXT_1d;
4119                 }
4120 
4121                 /*
4122                  * Find the # of elements (size) returned by the leaf and along
4123                  * the way detect last level cache sharing details.
4124                  */
4125                 bzero(&regs, sizeof (regs));
4126                 cp = &regs;
4127                 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4128                         cp->cp_eax = leaf;
4129                         cp->cp_ecx = i;
4130 
4131                         (void) __cpuid_insn(cp);
4132 
4133                         if (CPI_CACHE_TYPE(cp) == 0)
4134                                 break;
4135                         level = CPI_CACHE_LVL(cp);
4136                         if (level > max) {
4137                                 max = level;
4138                                 cpi->cpi_ncpu_shr_last_cache =
4139                                     CPI_NTHR_SHR_CACHE(cp) + 1;
4140                         }
4141                 }
4142                 cpi->cpi_cache_leaf_size = size = i;
4143 
4144                 /*
4145                  * Allocate the cpi_cache_leaves array. The first element
4146                  * references the regs for the corresponding leaf with %ecx set
4147                  * to 0. This was gathered in cpuid_pass2().
4148                  */
4149                 if (size > 0) {
4150                         cpi->cpi_cache_leaves =
4151                             kmem_alloc(size * sizeof (cp), KM_SLEEP);
4152                         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4153                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4154                         } else {
4155                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4156                         }
4157 
4158                         /*
4159                          * Allocate storage to hold the additional regs
4160                          * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4161                          *
4162                          * The regs for the leaf, %ecx == 0 has already
4163                          * been allocated as indicated above.
4164                          */
4165                         for (i = 1; i < size; i++) {
4166                                 cp = cpi->cpi_cache_leaves[i] =
4167                                     kmem_zalloc(sizeof (regs), KM_SLEEP);
4168                                 cp->cp_eax = leaf;
4169                                 cp->cp_ecx = i;
4170 
4171                                 (void) __cpuid_insn(cp);
4172                         }
4173                 }
4174                 /*
4175                  * Determine the number of bits needed to represent
4176                  * the number of CPUs sharing the last level cache.
4177                  *
4178                  * Shift off that number of bits from the APIC id to
4179                  * derive the cache id.
4180                  */
4181                 shft = 0;
4182                 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4183                         shft++;
4184                 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4185         }
4186 
4187         /*
4188          * Now fixup the brand string
4189          */
4190         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4191                 fabricate_brandstr(cpi);
4192         } else {
4193 
4194                 /*
4195                  * If we successfully extracted a brand string from the cpuid
4196                  * instruction, clean it up by removing leading spaces and
4197                  * similar junk.
4198                  */
4199                 if (cpi->cpi_brandstr[0]) {
4200                         size_t maxlen = sizeof (cpi->cpi_brandstr);
4201                         char *src, *dst;
4202 
4203                         dst = src = (char *)cpi->cpi_brandstr;
4204                         src[maxlen - 1] = '\0';
4205                         /*
4206                          * strip leading spaces
4207                          */
4208                         while (*src == ' ')
4209                                 src++;
4210                         /*
4211                          * Remove any 'Genuine' or "Authentic" prefixes
4212                          */
4213                         if (strncmp(src, "Genuine ", 8) == 0)
4214                                 src += 8;
4215                         if (strncmp(src, "Authentic ", 10) == 0)
4216                                 src += 10;
4217 
4218                         /*
4219                          * Now do an in-place copy.
4220                          * Map (R) to (r) and (TM) to (tm).
4221                          * The era of teletypes is long gone, and there's
4222                          * -really- no need to shout.
4223                          */
4224                         while (*src != '\0') {
4225                                 if (src[0] == '(') {
4226                                         if (strncmp(src + 1, "R)", 2) == 0) {
4227                                                 (void) strncpy(dst, "(r)", 3);
4228                                                 src += 3;
4229                                                 dst += 3;
4230                                                 continue;
4231                                         }
4232                                         if (strncmp(src + 1, "TM)", 3) == 0) {
4233                                                 (void) strncpy(dst, "(tm)", 4);
4234                                                 src += 4;
4235                                                 dst += 4;
4236                                                 continue;
4237                                         }
4238                                 }
4239                                 *dst++ = *src++;
4240                         }
4241                         *dst = '\0';
4242 
4243                         /*
4244                          * Finally, remove any trailing spaces
4245                          */
4246                         while (--dst > cpi->cpi_brandstr)
4247                                 if (*dst == ' ')
4248                                         *dst = '\0';
4249                                 else
4250                                         break;
4251                 } else
4252                         fabricate_brandstr(cpi);
4253         }
4254         cpi->cpi_pass = 3;
4255 }
4256 
4257 /*
4258  * This routine is called out of bind_hwcap() much later in the life
4259  * of the kernel (post_startup()).  The job of this routine is to resolve
4260  * the hardware feature support and kernel support for those features into
4261  * what we're actually going to tell applications via the aux vector.
4262  */
4263 void
4264 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4265 {
4266         struct cpuid_info *cpi;
4267         uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4268 
4269         if (cpu == NULL)
4270                 cpu = CPU;
4271         cpi = cpu->cpu_m.mcpu_cpi;
4272 
4273         ASSERT(cpi->cpi_pass == 3);
4274 
4275         if (cpi->cpi_maxeax >= 1) {
4276                 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4277                 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4278                 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4279 
4280                 *edx = CPI_FEATURES_EDX(cpi);
4281                 *ecx = CPI_FEATURES_ECX(cpi);
4282                 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4283 
4284                 /*
4285                  * [these require explicit kernel support]
4286                  */
4287                 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4288                         *edx &= ~CPUID_INTC_EDX_SEP;
4289 
4290                 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4291                         *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4292                 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4293                         *edx &= ~CPUID_INTC_EDX_SSE2;
4294 
4295                 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4296                         *edx &= ~CPUID_INTC_EDX_HTT;
4297 
4298                 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4299                         *ecx &= ~CPUID_INTC_ECX_SSE3;
4300 
4301                 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4302                         *ecx &= ~CPUID_INTC_ECX_SSSE3;
4303                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4304                         *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4305                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4306                         *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4307                 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4308                         *ecx &= ~CPUID_INTC_ECX_AES;
4309                 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4310                         *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4311                 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4312                         *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4313                             CPUID_INTC_ECX_OSXSAVE);
4314                 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4315                         *ecx &= ~CPUID_INTC_ECX_AVX;
4316                 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4317                         *ecx &= ~CPUID_INTC_ECX_F16C;
4318                 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4319                         *ecx &= ~CPUID_INTC_ECX_FMA;
4320                 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4321                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4322                 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4323                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4324                 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4325                         *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4326                 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4327                         *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4328                 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4329                         *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4330 
4331                 /*
4332                  * [no explicit support required beyond x87 fp context]
4333                  */
4334                 if (!fpu_exists)
4335                         *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4336 
4337                 /*
4338                  * Now map the supported feature vector to things that we
4339                  * think userland will care about.
4340                  */
4341                 if (*edx & CPUID_INTC_EDX_SEP)
4342                         hwcap_flags |= AV_386_SEP;
4343                 if (*edx & CPUID_INTC_EDX_SSE)
4344                         hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4345                 if (*edx & CPUID_INTC_EDX_SSE2)
4346                         hwcap_flags |= AV_386_SSE2;
4347                 if (*ecx & CPUID_INTC_ECX_SSE3)
4348                         hwcap_flags |= AV_386_SSE3;
4349                 if (*ecx & CPUID_INTC_ECX_SSSE3)
4350                         hwcap_flags |= AV_386_SSSE3;
4351                 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4352                         hwcap_flags |= AV_386_SSE4_1;
4353                 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4354                         hwcap_flags |= AV_386_SSE4_2;
4355                 if (*ecx & CPUID_INTC_ECX_MOVBE)
4356                         hwcap_flags |= AV_386_MOVBE;
4357                 if (*ecx & CPUID_INTC_ECX_AES)
4358                         hwcap_flags |= AV_386_AES;
4359                 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4360                         hwcap_flags |= AV_386_PCLMULQDQ;
4361                 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4362                     (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4363                         hwcap_flags |= AV_386_XSAVE;
4364 
4365                         if (*ecx & CPUID_INTC_ECX_AVX) {
4366                                 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4367                                 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4368 
4369                                 hwcap_flags |= AV_386_AVX;
4370                                 if (*ecx & CPUID_INTC_ECX_F16C)
4371                                         hwcap_flags_2 |= AV_386_2_F16C;
4372                                 if (*ecx & CPUID_INTC_ECX_FMA)
4373                                         hwcap_flags_2 |= AV_386_2_FMA;
4374 
4375                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4376                                         hwcap_flags_2 |= AV_386_2_BMI1;
4377                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4378                                         hwcap_flags_2 |= AV_386_2_BMI2;
4379                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4380                                         hwcap_flags_2 |= AV_386_2_AVX2;
4381                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4382                                         hwcap_flags_2 |= AV_386_2_AVX512F;
4383                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4384                                         hwcap_flags_2 |= AV_386_2_AVX512DQ;
4385                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4386                                         hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4387                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4388                                         hwcap_flags_2 |= AV_386_2_AVX512PF;
4389                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4390                                         hwcap_flags_2 |= AV_386_2_AVX512ER;
4391                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4392                                         hwcap_flags_2 |= AV_386_2_AVX512CD;
4393                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4394                                         hwcap_flags_2 |= AV_386_2_AVX512BW;
4395                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4396                                         hwcap_flags_2 |= AV_386_2_AVX512VL;
4397 
4398                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4399                                         hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4400                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4401                                         hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4402                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4403                                         hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4404 
4405                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4406                                         hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4407                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4408                                         hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4409                         }
4410                 }
4411                 if (*ecx & CPUID_INTC_ECX_VMX)
4412                         hwcap_flags |= AV_386_VMX;
4413                 if (*ecx & CPUID_INTC_ECX_POPCNT)
4414                         hwcap_flags |= AV_386_POPCNT;
4415                 if (*edx & CPUID_INTC_EDX_FPU)
4416                         hwcap_flags |= AV_386_FPU;
4417                 if (*edx & CPUID_INTC_EDX_MMX)
4418                         hwcap_flags |= AV_386_MMX;
4419 
4420                 if (*edx & CPUID_INTC_EDX_TSC)
4421                         hwcap_flags |= AV_386_TSC;
4422                 if (*edx & CPUID_INTC_EDX_CX8)
4423                         hwcap_flags |= AV_386_CX8;
4424                 if (*edx & CPUID_INTC_EDX_CMOV)
4425                         hwcap_flags |= AV_386_CMOV;
4426                 if (*ecx & CPUID_INTC_ECX_CX16)
4427                         hwcap_flags |= AV_386_CX16;
4428 
4429                 if (*ecx & CPUID_INTC_ECX_RDRAND)
4430                         hwcap_flags_2 |= AV_386_2_RDRAND;
4431                 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4432                         hwcap_flags_2 |= AV_386_2_ADX;
4433                 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4434                         hwcap_flags_2 |= AV_386_2_RDSEED;
4435                 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4436                         hwcap_flags_2 |= AV_386_2_SHA;
4437                 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4438                         hwcap_flags_2 |= AV_386_2_FSGSBASE;
4439                 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4440                         hwcap_flags_2 |= AV_386_2_CLWB;
4441                 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4442                         hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4443 
4444         }
4445         /*
4446          * Check a few miscilaneous features.
4447          */
4448         if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4449                 hwcap_flags_2 |= AV_386_2_CLZERO;
4450 
4451         if (cpi->cpi_xmaxeax < 0x80000001)
4452                 goto pass4_done;
4453 
4454         switch (cpi->cpi_vendor) {
4455                 struct cpuid_regs cp;
4456                 uint32_t *edx, *ecx;
4457 
4458         case X86_VENDOR_Intel:
4459                 /*
4460                  * Seems like Intel duplicated what we necessary
4461                  * here to make the initial crop of 64-bit OS's work.
4462                  * Hopefully, those are the only "extended" bits
4463                  * they'll add.
4464                  */
4465                 /*FALLTHROUGH*/
4466 
4467         case X86_VENDOR_AMD:
4468                 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4469                 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4470 
4471                 *edx = CPI_FEATURES_XTD_EDX(cpi);
4472                 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4473 
4474                 /*
4475                  * [these features require explicit kernel support]
4476                  */
4477                 switch (cpi->cpi_vendor) {
4478                 case X86_VENDOR_Intel:
4479                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4480                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4481                         break;
4482 
4483                 case X86_VENDOR_AMD:
4484                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4485                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4486                         if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4487                                 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4488                         break;
4489 
4490                 default:
4491                         break;
4492                 }
4493 
4494                 /*
4495                  * [no explicit support required beyond
4496                  * x87 fp context and exception handlers]
4497                  */
4498                 if (!fpu_exists)
4499                         *edx &= ~(CPUID_AMD_EDX_MMXamd |
4500                             CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4501 
4502                 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4503                         *edx &= ~CPUID_AMD_EDX_NX;
4504 #if !defined(__amd64)
4505                 *edx &= ~CPUID_AMD_EDX_LM;
4506 #endif
4507                 /*
4508                  * Now map the supported feature vector to
4509                  * things that we think userland will care about.
4510                  */
4511 #if defined(__amd64)
4512                 if (*edx & CPUID_AMD_EDX_SYSC)
4513                         hwcap_flags |= AV_386_AMD_SYSC;
4514 #endif
4515                 if (*edx & CPUID_AMD_EDX_MMXamd)
4516                         hwcap_flags |= AV_386_AMD_MMX;
4517                 if (*edx & CPUID_AMD_EDX_3DNow)
4518                         hwcap_flags |= AV_386_AMD_3DNow;
4519                 if (*edx & CPUID_AMD_EDX_3DNowx)
4520                         hwcap_flags |= AV_386_AMD_3DNowx;
4521                 if (*ecx & CPUID_AMD_ECX_SVM)
4522                         hwcap_flags |= AV_386_AMD_SVM;
4523 
4524                 switch (cpi->cpi_vendor) {
4525                 case X86_VENDOR_AMD:
4526                         if (*edx & CPUID_AMD_EDX_TSCP)
4527                                 hwcap_flags |= AV_386_TSCP;
4528                         if (*ecx & CPUID_AMD_ECX_AHF64)
4529                                 hwcap_flags |= AV_386_AHF;
4530                         if (*ecx & CPUID_AMD_ECX_SSE4A)
4531                                 hwcap_flags |= AV_386_AMD_SSE4A;
4532                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4533                                 hwcap_flags |= AV_386_AMD_LZCNT;
4534                         if (*ecx & CPUID_AMD_ECX_MONITORX)
4535                                 hwcap_flags_2 |= AV_386_2_MONITORX;
4536                         break;
4537 
4538                 case X86_VENDOR_Intel:
4539                         if (*edx & CPUID_AMD_EDX_TSCP)
4540                                 hwcap_flags |= AV_386_TSCP;
4541                         /*
4542                          * Aarrgh.
4543                          * Intel uses a different bit in the same word.
4544                          */
4545                         if (*ecx & CPUID_INTC_ECX_AHF64)
4546                                 hwcap_flags |= AV_386_AHF;
4547                         break;
4548 
4549                 default:
4550                         break;
4551                 }
4552                 break;
4553 
4554         case X86_VENDOR_TM:
4555                 cp.cp_eax = 0x80860001;
4556                 (void) __cpuid_insn(&cp);
4557                 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4558                 break;
4559 
4560         default:
4561                 break;
4562         }
4563 
4564 pass4_done:
4565         cpi->cpi_pass = 4;
4566         if (hwcap_out != NULL) {
4567                 hwcap_out[0] = hwcap_flags;
4568                 hwcap_out[1] = hwcap_flags_2;
4569         }
4570 }
4571 
4572 
4573 /*
4574  * Simulate the cpuid instruction using the data we previously
4575  * captured about this CPU.  We try our best to return the truth
4576  * about the hardware, independently of kernel support.
4577  */
4578 uint32_t
4579 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4580 {
4581         struct cpuid_info *cpi;
4582         struct cpuid_regs *xcp;
4583 
4584         if (cpu == NULL)
4585                 cpu = CPU;
4586         cpi = cpu->cpu_m.mcpu_cpi;
4587 
4588         ASSERT(cpuid_checkpass(cpu, 3));
4589 
4590         /*
4591          * CPUID data is cached in two separate places: cpi_std for standard
4592          * CPUID leaves , and cpi_extd for extended CPUID leaves.
4593          */
4594         if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4595                 xcp = &cpi->cpi_std[cp->cp_eax];
4596         } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4597             cp->cp_eax <= cpi->cpi_xmaxeax &&
4598             cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4599                 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4600         } else {
4601                 /*
4602                  * The caller is asking for data from an input parameter which
4603                  * the kernel has not cached.  In this case we go fetch from
4604                  * the hardware and return the data directly to the user.
4605                  */
4606                 return (__cpuid_insn(cp));
4607         }
4608 
4609         cp->cp_eax = xcp->cp_eax;
4610         cp->cp_ebx = xcp->cp_ebx;
4611         cp->cp_ecx = xcp->cp_ecx;
4612         cp->cp_edx = xcp->cp_edx;
4613         return (cp->cp_eax);
4614 }
4615 
4616 int
4617 cpuid_checkpass(cpu_t *cpu, int pass)
4618 {
4619         return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4620             cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4621 }
4622 
4623 int
4624 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4625 {
4626         ASSERT(cpuid_checkpass(cpu, 3));
4627 
4628         return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4629 }
4630 
4631 int
4632 cpuid_is_cmt(cpu_t *cpu)
4633 {
4634         if (cpu == NULL)
4635                 cpu = CPU;
4636 
4637         ASSERT(cpuid_checkpass(cpu, 1));
4638 
4639         return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4640 }
4641 
4642 /*
4643  * AMD and Intel both implement the 64-bit variant of the syscall
4644  * instruction (syscallq), so if there's -any- support for syscall,
4645  * cpuid currently says "yes, we support this".
4646  *
4647  * However, Intel decided to -not- implement the 32-bit variant of the
4648  * syscall instruction, so we provide a predicate to allow our caller
4649  * to test that subtlety here.
4650  *
4651  * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4652  *      even in the case where the hardware would in fact support it.
4653  */
4654 /*ARGSUSED*/
4655 int
4656 cpuid_syscall32_insn(cpu_t *cpu)
4657 {
4658         ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4659 
4660 #if !defined(__xpv)
4661         if (cpu == NULL)
4662                 cpu = CPU;
4663 
4664         /*CSTYLED*/
4665         {
4666                 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4667 
4668                 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4669                     cpi->cpi_xmaxeax >= 0x80000001 &&
4670                     (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4671                         return (1);
4672         }
4673 #endif
4674         return (0);
4675 }
4676 
4677 int
4678 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4679 {
4680         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4681 
4682         static const char fmt[] =
4683             "x86 (%s %X family %d model %d step %d clock %d MHz)";
4684         static const char fmt_ht[] =
4685             "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4686 
4687         ASSERT(cpuid_checkpass(cpu, 1));
4688 
4689         if (cpuid_is_cmt(cpu))
4690                 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4691                     cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4692                     cpi->cpi_family, cpi->cpi_model,
4693                     cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4694         return (snprintf(s, n, fmt,
4695             cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4696             cpi->cpi_family, cpi->cpi_model,
4697             cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4698 }
4699 
4700 const char *
4701 cpuid_getvendorstr(cpu_t *cpu)
4702 {
4703         ASSERT(cpuid_checkpass(cpu, 1));
4704         return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4705 }
4706 
4707 uint_t
4708 cpuid_getvendor(cpu_t *cpu)
4709 {
4710         ASSERT(cpuid_checkpass(cpu, 1));
4711         return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4712 }
4713 
4714 uint_t
4715 cpuid_getfamily(cpu_t *cpu)
4716 {
4717         ASSERT(cpuid_checkpass(cpu, 1));
4718         return (cpu->cpu_m.mcpu_cpi->cpi_family);
4719 }
4720 
4721 uint_t
4722 cpuid_getmodel(cpu_t *cpu)
4723 {
4724         ASSERT(cpuid_checkpass(cpu, 1));
4725         return (cpu->cpu_m.mcpu_cpi->cpi_model);
4726 }
4727 
4728 uint_t
4729 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4730 {
4731         ASSERT(cpuid_checkpass(cpu, 1));
4732         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4733 }
4734 
4735 uint_t
4736 cpuid_get_ncore_per_chip(cpu_t *cpu)
4737 {
4738         ASSERT(cpuid_checkpass(cpu, 1));
4739         return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4740 }
4741 
4742 uint_t
4743 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4744 {
4745         ASSERT(cpuid_checkpass(cpu, 2));
4746         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4747 }
4748 
4749 id_t
4750 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4751 {
4752         ASSERT(cpuid_checkpass(cpu, 2));
4753         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4754 }
4755 
4756 uint_t
4757 cpuid_getstep(cpu_t *cpu)
4758 {
4759         ASSERT(cpuid_checkpass(cpu, 1));
4760         return (cpu->cpu_m.mcpu_cpi->cpi_step);
4761 }
4762 
4763 uint_t
4764 cpuid_getsig(struct cpu *cpu)
4765 {
4766         ASSERT(cpuid_checkpass(cpu, 1));
4767         return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4768 }
4769 
4770 uint32_t
4771 cpuid_getchiprev(struct cpu *cpu)
4772 {
4773         ASSERT(cpuid_checkpass(cpu, 1));
4774         return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4775 }
4776 
4777 const char *
4778 cpuid_getchiprevstr(struct cpu *cpu)
4779 {
4780         ASSERT(cpuid_checkpass(cpu, 1));
4781         return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4782 }
4783 
4784 uint32_t
4785 cpuid_getsockettype(struct cpu *cpu)
4786 {
4787         ASSERT(cpuid_checkpass(cpu, 1));
4788         return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4789 }
4790 
4791 const char *
4792 cpuid_getsocketstr(cpu_t *cpu)
4793 {
4794         static const char *socketstr = NULL;
4795         struct cpuid_info *cpi;
4796 
4797         ASSERT(cpuid_checkpass(cpu, 1));
4798         cpi = cpu->cpu_m.mcpu_cpi;
4799 
4800         /* Assume that socket types are the same across the system */
4801         if (socketstr == NULL)
4802                 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4803                     cpi->cpi_model, cpi->cpi_step);
4804 
4805 
4806         return (socketstr);
4807 }
4808 
4809 int
4810 cpuid_get_chipid(cpu_t *cpu)
4811 {
4812         ASSERT(cpuid_checkpass(cpu, 1));
4813 
4814         if (cpuid_is_cmt(cpu))
4815                 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4816         return (cpu->cpu_id);
4817 }
4818 
4819 id_t
4820 cpuid_get_coreid(cpu_t *cpu)
4821 {
4822         ASSERT(cpuid_checkpass(cpu, 1));
4823         return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4824 }
4825 
4826 int
4827 cpuid_get_pkgcoreid(cpu_t *cpu)
4828 {
4829         ASSERT(cpuid_checkpass(cpu, 1));
4830         return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4831 }
4832 
4833 int
4834 cpuid_get_clogid(cpu_t *cpu)
4835 {
4836         ASSERT(cpuid_checkpass(cpu, 1));
4837         return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4838 }
4839 
4840 int
4841 cpuid_get_cacheid(cpu_t *cpu)
4842 {
4843         ASSERT(cpuid_checkpass(cpu, 1));
4844         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4845 }
4846 
4847 uint_t
4848 cpuid_get_procnodeid(cpu_t *cpu)
4849 {
4850         ASSERT(cpuid_checkpass(cpu, 1));
4851         return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4852 }
4853 
4854 uint_t
4855 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4856 {
4857         ASSERT(cpuid_checkpass(cpu, 1));
4858         return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4859 }
4860 
4861 uint_t
4862 cpuid_get_compunitid(cpu_t *cpu)
4863 {
4864         ASSERT(cpuid_checkpass(cpu, 1));
4865         return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4866 }
4867 
4868 uint_t
4869 cpuid_get_cores_per_compunit(cpu_t *cpu)
4870 {
4871         ASSERT(cpuid_checkpass(cpu, 1));
4872         return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4873 }
4874 
4875 /*ARGSUSED*/
4876 int
4877 cpuid_have_cr8access(cpu_t *cpu)
4878 {
4879 #if defined(__amd64)
4880         return (1);
4881 #else
4882         struct cpuid_info *cpi;
4883 
4884         ASSERT(cpu != NULL);
4885         cpi = cpu->cpu_m.mcpu_cpi;
4886         if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4887             (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4888                 return (1);
4889         return (0);
4890 #endif
4891 }
4892 
4893 uint32_t
4894 cpuid_get_apicid(cpu_t *cpu)
4895 {
4896         ASSERT(cpuid_checkpass(cpu, 1));
4897         if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4898                 return (UINT32_MAX);
4899         } else {
4900                 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4901         }
4902 }
4903 
4904 void
4905 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4906 {
4907         struct cpuid_info *cpi;
4908 
4909         if (cpu == NULL)
4910                 cpu = CPU;
4911         cpi = cpu->cpu_m.mcpu_cpi;
4912 
4913         ASSERT(cpuid_checkpass(cpu, 1));
4914 
4915         if (pabits)
4916                 *pabits = cpi->cpi_pabits;
4917         if (vabits)
4918                 *vabits = cpi->cpi_vabits;
4919 }
4920 
4921 size_t
4922 cpuid_get_xsave_size()
4923 {
4924         return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4925             sizeof (struct xsave_state)));
4926 }
4927 
4928 /*
4929  * Return true if the CPUs on this system require 'pointer clearing' for the
4930  * floating point error pointer exception handling. In the past, this has been
4931  * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4932  * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4933  * feature bit and is reflected in the cpi_fp_amd_save member.
4934  */
4935 boolean_t
4936 cpuid_need_fp_excp_handling()
4937 {
4938         return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4939             cpuid_info0.cpi_fp_amd_save != 0);
4940 }
4941 
4942 /*
4943  * Returns the number of data TLB entries for a corresponding
4944  * pagesize.  If it can't be computed, or isn't known, the
4945  * routine returns zero.  If you ask about an architecturally
4946  * impossible pagesize, the routine will panic (so that the
4947  * hat implementor knows that things are inconsistent.)
4948  */
4949 uint_t
4950 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4951 {
4952         struct cpuid_info *cpi;
4953         uint_t dtlb_nent = 0;
4954 
4955         if (cpu == NULL)
4956                 cpu = CPU;
4957         cpi = cpu->cpu_m.mcpu_cpi;
4958 
4959         ASSERT(cpuid_checkpass(cpu, 1));
4960 
4961         /*
4962          * Check the L2 TLB info
4963          */
4964         if (cpi->cpi_xmaxeax >= 0x80000006) {
4965                 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4966 
4967                 switch (pagesize) {
4968 
4969                 case 4 * 1024:
4970                         /*
4971                          * All zero in the top 16 bits of the register
4972                          * indicates a unified TLB. Size is in low 16 bits.
4973                          */
4974                         if ((cp->cp_ebx & 0xffff0000) == 0)
4975                                 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4976                         else
4977                                 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4978                         break;
4979 
4980                 case 2 * 1024 * 1024:
4981                         if ((cp->cp_eax & 0xffff0000) == 0)
4982                                 dtlb_nent = cp->cp_eax & 0x0000ffff;
4983                         else
4984                                 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4985                         break;
4986 
4987                 default:
4988                         panic("unknown L2 pagesize");
4989                         /*NOTREACHED*/
4990                 }
4991         }
4992 
4993         if (dtlb_nent != 0)
4994                 return (dtlb_nent);
4995 
4996         /*
4997          * No L2 TLB support for this size, try L1.
4998          */
4999         if (cpi->cpi_xmaxeax >= 0x80000005) {
5000                 struct cpuid_regs *cp = &cpi->cpi_extd[5];
5001 
5002                 switch (pagesize) {
5003                 case 4 * 1024:
5004                         dtlb_nent = BITX(cp->cp_ebx, 23, 16);
5005                         break;
5006                 case 2 * 1024 * 1024:
5007                         dtlb_nent = BITX(cp->cp_eax, 23, 16);
5008                         break;
5009                 default:
5010                         panic("unknown L1 d-TLB pagesize");
5011                         /*NOTREACHED*/
5012                 }
5013         }
5014 
5015         return (dtlb_nent);
5016 }
5017 
5018 /*
5019  * Return 0 if the erratum is not present or not applicable, positive
5020  * if it is, and negative if the status of the erratum is unknown.
5021  *
5022  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
5023  * Processors" #25759, Rev 3.57, August 2005
5024  */
5025 int
5026 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
5027 {
5028         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
5029         uint_t eax;
5030 
5031         /*
5032          * Bail out if this CPU isn't an AMD CPU, or if it's
5033          * a legacy (32-bit) AMD CPU.
5034          */
5035         if (cpi->cpi_vendor != X86_VENDOR_AMD ||
5036             cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
5037             cpi->cpi_family == 6) {
5038                 return (0);
5039         }
5040 
5041         eax = cpi->cpi_std[1].cp_eax;
5042 
5043 #define SH_B0(eax)      (eax == 0xf40 || eax == 0xf50)
5044 #define SH_B3(eax)      (eax == 0xf51)
5045 #define B(eax)          (SH_B0(eax) || SH_B3(eax))
5046 
5047 #define SH_C0(eax)      (eax == 0xf48 || eax == 0xf58)
5048 
5049 #define SH_CG(eax)      (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5050 #define DH_CG(eax)      (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5051 #define CH_CG(eax)      (eax == 0xf82 || eax == 0xfb2)
5052 #define CG(eax)         (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5053 
5054 #define SH_D0(eax)      (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5055 #define DH_D0(eax)      (eax == 0x10fc0 || eax == 0x10ff0)
5056 #define CH_D0(eax)      (eax == 0x10f80 || eax == 0x10fb0)
5057 #define D0(eax)         (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5058 
5059 #define SH_E0(eax)      (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5060 #define JH_E1(eax)      (eax == 0x20f10)        /* JH8_E0 had 0x20f30 */
5061 #define DH_E3(eax)      (eax == 0x20fc0 || eax == 0x20ff0)
5062 #define SH_E4(eax)      (eax == 0x20f51 || eax == 0x20f71)
5063 #define BH_E4(eax)      (eax == 0x20fb1)
5064 #define SH_E5(eax)      (eax == 0x20f42)
5065 #define DH_E6(eax)      (eax == 0x20ff2 || eax == 0x20fc2)
5066 #define JH_E6(eax)      (eax == 0x20f12 || eax == 0x20f32)
5067 #define EX(eax)         (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5068                             SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5069                             DH_E6(eax) || JH_E6(eax))
5070 
5071 #define DR_AX(eax)      (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5072 #define DR_B0(eax)      (eax == 0x100f20)
5073 #define DR_B1(eax)      (eax == 0x100f21)
5074 #define DR_BA(eax)      (eax == 0x100f2a)
5075 #define DR_B2(eax)      (eax == 0x100f22)
5076 #define DR_B3(eax)      (eax == 0x100f23)
5077 #define RB_C0(eax)      (eax == 0x100f40)
5078 
5079         switch (erratum) {
5080         case 1:
5081                 return (cpi->cpi_family < 0x10);
5082         case 51:        /* what does the asterisk mean? */
5083                 return (B(eax) || SH_C0(eax) || CG(eax));
5084         case 52:
5085                 return (B(eax));
5086         case 57:
5087                 return (cpi->cpi_family <= 0x11);
5088         case 58:
5089                 return (B(eax));
5090         case 60:
5091                 return (cpi->cpi_family <= 0x11);
5092         case 61:
5093         case 62:
5094         case 63:
5095         case 64:
5096         case 65:
5097         case 66:
5098         case 68:
5099         case 69:
5100         case 70:
5101         case 71:
5102                 return (B(eax));
5103         case 72:
5104                 return (SH_B0(eax));
5105         case 74:
5106                 return (B(eax));
5107         case 75:
5108                 return (cpi->cpi_family < 0x10);
5109         case 76:
5110                 return (B(eax));
5111         case 77:
5112                 return (cpi->cpi_family <= 0x11);
5113         case 78:
5114                 return (B(eax) || SH_C0(eax));
5115         case 79:
5116                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5117         case 80:
5118         case 81:
5119         case 82:
5120                 return (B(eax));
5121         case 83:
5122                 return (B(eax) || SH_C0(eax) || CG(eax));
5123         case 85:
5124                 return (cpi->cpi_family < 0x10);
5125         case 86:
5126                 return (SH_C0(eax) || CG(eax));
5127         case 88:
5128 #if !defined(__amd64)
5129                 return (0);
5130 #else
5131                 return (B(eax) || SH_C0(eax));
5132 #endif
5133         case 89:
5134                 return (cpi->cpi_family < 0x10);
5135         case 90:
5136                 return (B(eax) || SH_C0(eax) || CG(eax));
5137         case 91:
5138         case 92:
5139                 return (B(eax) || SH_C0(eax));
5140         case 93:
5141                 return (SH_C0(eax));
5142         case 94:
5143                 return (B(eax) || SH_C0(eax) || CG(eax));
5144         case 95:
5145 #if !defined(__amd64)
5146                 return (0);
5147 #else
5148                 return (B(eax) || SH_C0(eax));
5149 #endif
5150         case 96:
5151                 return (B(eax) || SH_C0(eax) || CG(eax));
5152         case 97:
5153         case 98:
5154                 return (SH_C0(eax) || CG(eax));
5155         case 99:
5156                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5157         case 100:
5158                 return (B(eax) || SH_C0(eax));
5159         case 101:
5160         case 103:
5161                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5162         case 104:
5163                 return (SH_C0(eax) || CG(eax) || D0(eax));
5164         case 105:
5165         case 106:
5166         case 107:
5167                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5168         case 108:
5169                 return (DH_CG(eax));
5170         case 109:
5171                 return (SH_C0(eax) || CG(eax) || D0(eax));
5172         case 110:
5173                 return (D0(eax) || EX(eax));
5174         case 111:
5175                 return (CG(eax));
5176         case 112:
5177                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5178         case 113:
5179                 return (eax == 0x20fc0);
5180         case 114:
5181                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5182         case 115:
5183                 return (SH_E0(eax) || JH_E1(eax));
5184         case 116:
5185                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5186         case 117:
5187                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5188         case 118:
5189                 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5190                     JH_E6(eax));
5191         case 121:
5192                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5193         case 122:
5194                 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5195         case 123:
5196                 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5197         case 131:
5198                 return (cpi->cpi_family < 0x10);
5199         case 6336786:
5200 
5201                 /*
5202                  * Test for AdvPowerMgmtInfo.TscPStateInvariant
5203                  * if this is a K8 family or newer processor. We're testing for
5204                  * this 'erratum' to determine whether or not we have a constant
5205                  * TSC.
5206                  *
5207                  * Our current fix for this is to disable the C1-Clock ramping.
5208                  * However, this doesn't work on newer processor families nor
5209                  * does it work when virtualized as those devices don't exist.
5210                  */
5211                 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5212                         return (0);
5213                 }
5214 
5215                 if (CPI_FAMILY(cpi) == 0xf) {
5216                         struct cpuid_regs regs;
5217                         regs.cp_eax = 0x80000007;
5218                         (void) __cpuid_insn(&regs);
5219                         return (!(regs.cp_edx & 0x100));
5220                 }
5221                 return (0);
5222         case 6323525:
5223                 /*
5224                  * This erratum (K8 #147) is not present on family 10 and newer.
5225                  */
5226                 if (cpi->cpi_family >= 0x10) {
5227                         return (0);
5228                 }
5229                 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5230                     (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5231 
5232         case 6671130:
5233                 /*
5234                  * check for processors (pre-Shanghai) that do not provide
5235                  * optimal management of 1gb ptes in its tlb.
5236                  */
5237                 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5238 
5239         case 298:
5240                 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5241                     DR_B2(eax) || RB_C0(eax));
5242 
5243         case 721:
5244 #if defined(__amd64)
5245                 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5246 #else
5247                 return (0);
5248 #endif
5249 
5250         default:
5251                 return (-1);
5252 
5253         }
5254 }
5255 
5256 /*
5257  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5258  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5259  */
5260 int
5261 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5262 {
5263         struct cpuid_info       *cpi;
5264         uint_t                  osvwid;
5265         static int              osvwfeature = -1;
5266         uint64_t                osvwlength;
5267 
5268 
5269         cpi = cpu->cpu_m.mcpu_cpi;
5270 
5271         /* confirm OSVW supported */
5272         if (osvwfeature == -1) {
5273                 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5274         } else {
5275                 /* assert that osvw feature setting is consistent on all cpus */
5276                 ASSERT(osvwfeature ==
5277                     (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5278         }
5279         if (!osvwfeature)
5280                 return (-1);
5281 
5282         osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5283 
5284         switch (erratum) {
5285         case 298:       /* osvwid is 0 */
5286                 osvwid = 0;
5287                 if (osvwlength <= (uint64_t)osvwid) {
5288                         /* osvwid 0 is unknown */
5289                         return (-1);
5290                 }
5291 
5292                 /*
5293                  * Check the OSVW STATUS MSR to determine the state
5294                  * of the erratum where:
5295                  *   0 - fixed by HW
5296                  *   1 - BIOS has applied the workaround when BIOS
5297                  *   workaround is available. (Or for other errata,
5298                  *   OS workaround is required.)
5299                  * For a value of 1, caller will confirm that the
5300                  * erratum 298 workaround has indeed been applied by BIOS.
5301                  *
5302                  * A 1 may be set in cpus that have a HW fix
5303                  * in a mixed cpu system. Regarding erratum 298:
5304                  *   In a multiprocessor platform, the workaround above
5305                  *   should be applied to all processors regardless of
5306                  *   silicon revision when an affected processor is
5307                  *   present.
5308                  */
5309 
5310                 return (rdmsr(MSR_AMD_OSVW_STATUS +
5311                     (osvwid / OSVW_ID_CNT_PER_MSR)) &
5312                     (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5313 
5314         default:
5315                 return (-1);
5316         }
5317 }
5318 
5319 static const char assoc_str[] = "associativity";
5320 static const char line_str[] = "line-size";
5321 static const char size_str[] = "size";
5322 
5323 static void
5324 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5325     uint32_t val)
5326 {
5327         char buf[128];
5328 
5329         /*
5330          * ndi_prop_update_int() is used because it is desirable for
5331          * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5332          */
5333         if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5334                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5335 }
5336 
5337 /*
5338  * Intel-style cache/tlb description
5339  *
5340  * Standard cpuid level 2 gives a randomly ordered
5341  * selection of tags that index into a table that describes
5342  * cache and tlb properties.
5343  */
5344 
5345 static const char l1_icache_str[] = "l1-icache";
5346 static const char l1_dcache_str[] = "l1-dcache";
5347 static const char l2_cache_str[] = "l2-cache";
5348 static const char l3_cache_str[] = "l3-cache";
5349 static const char itlb4k_str[] = "itlb-4K";
5350 static const char dtlb4k_str[] = "dtlb-4K";
5351 static const char itlb2M_str[] = "itlb-2M";
5352 static const char itlb4M_str[] = "itlb-4M";
5353 static const char dtlb4M_str[] = "dtlb-4M";
5354 static const char dtlb24_str[] = "dtlb0-2M-4M";
5355 static const char itlb424_str[] = "itlb-4K-2M-4M";
5356 static const char itlb24_str[] = "itlb-2M-4M";
5357 static const char dtlb44_str[] = "dtlb-4K-4M";
5358 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5359 static const char sl2_cache_str[] = "sectored-l2-cache";
5360 static const char itrace_str[] = "itrace-cache";
5361 static const char sl3_cache_str[] = "sectored-l3-cache";
5362 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5363 
5364 static const struct cachetab {
5365         uint8_t         ct_code;
5366         uint8_t         ct_assoc;
5367         uint16_t        ct_line_size;
5368         size_t          ct_size;
5369         const char      *ct_label;
5370 } intel_ctab[] = {
5371         /*
5372          * maintain descending order!
5373          *
5374          * Codes ignored - Reason
5375          * ----------------------
5376          * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5377          * f0H/f1H - Currently we do not interpret prefetch size by design
5378          */
5379         { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5380         { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5381         { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5382         { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5383         { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5384         { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5385         { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5386         { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5387         { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5388         { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5389         { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5390         { 0xd0, 4, 64, 512*1024, l3_cache_str},
5391         { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5392         { 0xc0, 4, 0, 8, dtlb44_str },
5393         { 0xba, 4, 0, 64, dtlb4k_str },
5394         { 0xb4, 4, 0, 256, dtlb4k_str },
5395         { 0xb3, 4, 0, 128, dtlb4k_str },
5396         { 0xb2, 4, 0, 64, itlb4k_str },
5397         { 0xb0, 4, 0, 128, itlb4k_str },
5398         { 0x87, 8, 64, 1024*1024, l2_cache_str},
5399         { 0x86, 4, 64, 512*1024, l2_cache_str},
5400         { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5401         { 0x84, 8, 32, 1024*1024, l2_cache_str},
5402         { 0x83, 8, 32, 512*1024, l2_cache_str},
5403         { 0x82, 8, 32, 256*1024, l2_cache_str},
5404         { 0x80, 8, 64, 512*1024, l2_cache_str},
5405         { 0x7f, 2, 64, 512*1024, l2_cache_str},
5406         { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5407         { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5408         { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5409         { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5410         { 0x79, 8, 64, 128*1024, sl2_cache_str},
5411         { 0x78, 8, 64, 1024*1024, l2_cache_str},
5412         { 0x73, 8, 0, 64*1024, itrace_str},
5413         { 0x72, 8, 0, 32*1024, itrace_str},
5414         { 0x71, 8, 0, 16*1024, itrace_str},
5415         { 0x70, 8, 0, 12*1024, itrace_str},
5416         { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5417         { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5418         { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5419         { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5420         { 0x5d, 0, 0, 256, dtlb44_str},
5421         { 0x5c, 0, 0, 128, dtlb44_str},
5422         { 0x5b, 0, 0, 64, dtlb44_str},
5423         { 0x5a, 4, 0, 32, dtlb24_str},
5424         { 0x59, 0, 0, 16, dtlb4k_str},
5425         { 0x57, 4, 0, 16, dtlb4k_str},
5426         { 0x56, 4, 0, 16, dtlb4M_str},
5427         { 0x55, 0, 0, 7, itlb24_str},
5428         { 0x52, 0, 0, 256, itlb424_str},
5429         { 0x51, 0, 0, 128, itlb424_str},
5430         { 0x50, 0, 0, 64, itlb424_str},
5431         { 0x4f, 0, 0, 32, itlb4k_str},
5432         { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5433         { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5434         { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5435         { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5436         { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5437         { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5438         { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5439         { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5440         { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5441         { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5442         { 0x44, 4, 32, 1024*1024, l2_cache_str},
5443         { 0x43, 4, 32, 512*1024, l2_cache_str},
5444         { 0x42, 4, 32, 256*1024, l2_cache_str},
5445         { 0x41, 4, 32, 128*1024, l2_cache_str},
5446         { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5447         { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5448         { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5449         { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5450         { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5451         { 0x39, 4, 64, 128*1024, sl2_cache_str},
5452         { 0x30, 8, 64, 32*1024, l1_icache_str},
5453         { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5454         { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5455         { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5456         { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5457         { 0x22, 4, 64, 512*1024, sl3_cache_str},
5458         { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5459         { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5460         { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5461         { 0x0b, 4, 0, 4, itlb4M_str},
5462         { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5463         { 0x08, 4, 32, 16*1024, l1_icache_str},
5464         { 0x06, 4, 32, 8*1024, l1_icache_str},
5465         { 0x05, 4, 0, 32, dtlb4M_str},
5466         { 0x04, 4, 0, 8, dtlb4M_str},
5467         { 0x03, 4, 0, 64, dtlb4k_str},
5468         { 0x02, 4, 0, 2, itlb4M_str},
5469         { 0x01, 4, 0, 32, itlb4k_str},
5470         { 0 }
5471 };
5472 
5473 static const struct cachetab cyrix_ctab[] = {
5474         { 0x70, 4, 0, 32, "tlb-4K" },
5475         { 0x80, 4, 16, 16*1024, "l1-cache" },
5476         { 0 }
5477 };
5478 
5479 /*
5480  * Search a cache table for a matching entry
5481  */
5482 static const struct cachetab *
5483 find_cacheent(const struct cachetab *ct, uint_t code)
5484 {
5485         if (code != 0) {
5486                 for (; ct->ct_code != 0; ct++)
5487                         if (ct->ct_code <= code)
5488                                 break;
5489                 if (ct->ct_code == code)
5490                         return (ct);
5491         }
5492         return (NULL);
5493 }
5494 
5495 /*
5496  * Populate cachetab entry with L2 or L3 cache-information using
5497  * cpuid function 4. This function is called from intel_walk_cacheinfo()
5498  * when descriptor 0x49 is encountered. It returns 0 if no such cache
5499  * information is found.
5500  */
5501 static int
5502 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5503 {
5504         uint32_t level, i;
5505         int ret = 0;
5506 
5507         for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5508                 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5509 
5510                 if (level == 2 || level == 3) {
5511                         ct->ct_assoc =
5512                             CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5513                         ct->ct_line_size =
5514                             CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5515                         ct->ct_size = ct->ct_assoc *
5516                             (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5517                             ct->ct_line_size *
5518                             (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5519 
5520                         if (level == 2) {
5521                                 ct->ct_label = l2_cache_str;
5522                         } else if (level == 3) {
5523                                 ct->ct_label = l3_cache_str;
5524                         }
5525                         ret = 1;
5526                 }
5527         }
5528 
5529         return (ret);
5530 }
5531 
5532 /*
5533  * Walk the cacheinfo descriptor, applying 'func' to every valid element
5534  * The walk is terminated if the walker returns non-zero.
5535  */
5536 static void
5537 intel_walk_cacheinfo(struct cpuid_info *cpi,
5538     void *arg, int (*func)(void *, const struct cachetab *))
5539 {
5540         const struct cachetab *ct;
5541         struct cachetab des_49_ct, des_b1_ct;
5542         uint8_t *dp;
5543         int i;
5544 
5545         if ((dp = cpi->cpi_cacheinfo) == NULL)
5546                 return;
5547         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5548                 /*
5549                  * For overloaded descriptor 0x49 we use cpuid function 4
5550                  * if supported by the current processor, to create
5551                  * cache information.
5552                  * For overloaded descriptor 0xb1 we use X86_PAE flag
5553                  * to disambiguate the cache information.
5554                  */
5555                 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5556                     intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5557                                 ct = &des_49_ct;
5558                 } else if (*dp == 0xb1) {
5559                         des_b1_ct.ct_code = 0xb1;
5560                         des_b1_ct.ct_assoc = 4;
5561                         des_b1_ct.ct_line_size = 0;
5562                         if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5563                                 des_b1_ct.ct_size = 8;
5564                                 des_b1_ct.ct_label = itlb2M_str;
5565                         } else {
5566                                 des_b1_ct.ct_size = 4;
5567                                 des_b1_ct.ct_label = itlb4M_str;
5568                         }
5569                         ct = &des_b1_ct;
5570                 } else {
5571                         if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5572                                 continue;
5573                         }
5574                 }
5575 
5576                 if (func(arg, ct) != 0) {
5577                         break;
5578                 }
5579         }
5580 }
5581 
5582 /*
5583  * (Like the Intel one, except for Cyrix CPUs)
5584  */
5585 static void
5586 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5587     void *arg, int (*func)(void *, const struct cachetab *))
5588 {
5589         const struct cachetab *ct;
5590         uint8_t *dp;
5591         int i;
5592 
5593         if ((dp = cpi->cpi_cacheinfo) == NULL)
5594                 return;
5595         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5596                 /*
5597                  * Search Cyrix-specific descriptor table first ..
5598                  */
5599                 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5600                         if (func(arg, ct) != 0)
5601                                 break;
5602                         continue;
5603                 }
5604                 /*
5605                  * .. else fall back to the Intel one
5606                  */
5607                 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5608                         if (func(arg, ct) != 0)
5609                                 break;
5610                         continue;
5611                 }
5612         }
5613 }
5614 
5615 /*
5616  * A cacheinfo walker that adds associativity, line-size, and size properties
5617  * to the devinfo node it is passed as an argument.
5618  */
5619 static int
5620 add_cacheent_props(void *arg, const struct cachetab *ct)
5621 {
5622         dev_info_t *devi = arg;
5623 
5624         add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5625         if (ct->ct_line_size != 0)
5626                 add_cache_prop(devi, ct->ct_label, line_str,
5627                     ct->ct_line_size);
5628         add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5629         return (0);
5630 }
5631 
5632 
5633 static const char fully_assoc[] = "fully-associative?";
5634 
5635 /*
5636  * AMD style cache/tlb description
5637  *
5638  * Extended functions 5 and 6 directly describe properties of
5639  * tlbs and various cache levels.
5640  */
5641 static void
5642 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5643 {
5644         switch (assoc) {
5645         case 0: /* reserved; ignore */
5646                 break;
5647         default:
5648                 add_cache_prop(devi, label, assoc_str, assoc);
5649                 break;
5650         case 0xff:
5651                 add_cache_prop(devi, label, fully_assoc, 1);
5652                 break;
5653         }
5654 }
5655 
5656 static void
5657 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5658 {
5659         if (size == 0)
5660                 return;
5661         add_cache_prop(devi, label, size_str, size);
5662         add_amd_assoc(devi, label, assoc);
5663 }
5664 
5665 static void
5666 add_amd_cache(dev_info_t *devi, const char *label,
5667     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5668 {
5669         if (size == 0 || line_size == 0)
5670                 return;
5671         add_amd_assoc(devi, label, assoc);
5672         /*
5673          * Most AMD parts have a sectored cache. Multiple cache lines are
5674          * associated with each tag. A sector consists of all cache lines
5675          * associated with a tag. For example, the AMD K6-III has a sector
5676          * size of 2 cache lines per tag.
5677          */
5678         if (lines_per_tag != 0)
5679                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5680         add_cache_prop(devi, label, line_str, line_size);
5681         add_cache_prop(devi, label, size_str, size * 1024);
5682 }
5683 
5684 static void
5685 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5686 {
5687         switch (assoc) {
5688         case 0: /* off */
5689                 break;
5690         case 1:
5691         case 2:
5692         case 4:
5693                 add_cache_prop(devi, label, assoc_str, assoc);
5694                 break;
5695         case 6:
5696                 add_cache_prop(devi, label, assoc_str, 8);
5697                 break;
5698         case 8:
5699                 add_cache_prop(devi, label, assoc_str, 16);
5700                 break;
5701         case 0xf:
5702                 add_cache_prop(devi, label, fully_assoc, 1);
5703                 break;
5704         default: /* reserved; ignore */
5705                 break;
5706         }
5707 }
5708 
5709 static void
5710 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5711 {
5712         if (size == 0 || assoc == 0)
5713                 return;
5714         add_amd_l2_assoc(devi, label, assoc);
5715         add_cache_prop(devi, label, size_str, size);
5716 }
5717 
5718 static void
5719 add_amd_l2_cache(dev_info_t *devi, const char *label,
5720     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5721 {
5722         if (size == 0 || assoc == 0 || line_size == 0)
5723                 return;
5724         add_amd_l2_assoc(devi, label, assoc);
5725         if (lines_per_tag != 0)
5726                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5727         add_cache_prop(devi, label, line_str, line_size);
5728         add_cache_prop(devi, label, size_str, size * 1024);
5729 }
5730 
5731 static void
5732 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5733 {
5734         struct cpuid_regs *cp;
5735 
5736         if (cpi->cpi_xmaxeax < 0x80000005)
5737                 return;
5738         cp = &cpi->cpi_extd[5];
5739 
5740         /*
5741          * 4M/2M L1 TLB configuration
5742          *
5743          * We report the size for 2M pages because AMD uses two
5744          * TLB entries for one 4M page.
5745          */
5746         add_amd_tlb(devi, "dtlb-2M",
5747             BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5748         add_amd_tlb(devi, "itlb-2M",
5749             BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5750 
5751         /*
5752          * 4K L1 TLB configuration
5753          */
5754 
5755         switch (cpi->cpi_vendor) {
5756                 uint_t nentries;
5757         case X86_VENDOR_TM:
5758                 if (cpi->cpi_family >= 5) {
5759                         /*
5760                          * Crusoe processors have 256 TLB entries, but
5761                          * cpuid data format constrains them to only
5762                          * reporting 255 of them.
5763                          */
5764                         if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5765                                 nentries = 256;
5766                         /*
5767                          * Crusoe processors also have a unified TLB
5768                          */
5769                         add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5770                             nentries);
5771                         break;
5772                 }
5773                 /*FALLTHROUGH*/
5774         default:
5775                 add_amd_tlb(devi, itlb4k_str,
5776                     BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5777                 add_amd_tlb(devi, dtlb4k_str,
5778                     BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5779                 break;
5780         }
5781 
5782         /*
5783          * data L1 cache configuration
5784          */
5785 
5786         add_amd_cache(devi, l1_dcache_str,
5787             BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5788             BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5789 
5790         /*
5791          * code L1 cache configuration
5792          */
5793 
5794         add_amd_cache(devi, l1_icache_str,
5795             BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5796             BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5797 
5798         if (cpi->cpi_xmaxeax < 0x80000006)
5799                 return;
5800         cp = &cpi->cpi_extd[6];
5801 
5802         /* Check for a unified L2 TLB for large pages */
5803 
5804         if (BITX(cp->cp_eax, 31, 16) == 0)
5805                 add_amd_l2_tlb(devi, "l2-tlb-2M",
5806                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5807         else {
5808                 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5809                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5810                 add_amd_l2_tlb(devi, "l2-itlb-2M",
5811                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5812         }
5813 
5814         /* Check for a unified L2 TLB for 4K pages */
5815 
5816         if (BITX(cp->cp_ebx, 31, 16) == 0) {
5817                 add_amd_l2_tlb(devi, "l2-tlb-4K",
5818                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5819         } else {
5820                 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5821                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5822                 add_amd_l2_tlb(devi, "l2-itlb-4K",
5823                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5824         }
5825 
5826         add_amd_l2_cache(devi, l2_cache_str,
5827             BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5828             BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5829 }
5830 
5831 /*
5832  * There are two basic ways that the x86 world describes it cache
5833  * and tlb architecture - Intel's way and AMD's way.
5834  *
5835  * Return which flavor of cache architecture we should use
5836  */
5837 static int
5838 x86_which_cacheinfo(struct cpuid_info *cpi)
5839 {
5840         switch (cpi->cpi_vendor) {
5841         case X86_VENDOR_Intel:
5842                 if (cpi->cpi_maxeax >= 2)
5843                         return (X86_VENDOR_Intel);
5844                 break;
5845         case X86_VENDOR_AMD:
5846                 /*
5847                  * The K5 model 1 was the first part from AMD that reported
5848                  * cache sizes via extended cpuid functions.
5849                  */
5850                 if (cpi->cpi_family > 5 ||
5851                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5852                         return (X86_VENDOR_AMD);
5853                 break;
5854         case X86_VENDOR_TM:
5855                 if (cpi->cpi_family >= 5)
5856                         return (X86_VENDOR_AMD);
5857                 /*FALLTHROUGH*/
5858         default:
5859                 /*
5860                  * If they have extended CPU data for 0x80000005
5861                  * then we assume they have AMD-format cache
5862                  * information.
5863                  *
5864                  * If not, and the vendor happens to be Cyrix,
5865                  * then try our-Cyrix specific handler.
5866                  *
5867                  * If we're not Cyrix, then assume we're using Intel's
5868                  * table-driven format instead.
5869                  */
5870                 if (cpi->cpi_xmaxeax >= 0x80000005)
5871                         return (X86_VENDOR_AMD);
5872                 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5873                         return (X86_VENDOR_Cyrix);
5874                 else if (cpi->cpi_maxeax >= 2)
5875                         return (X86_VENDOR_Intel);
5876                 break;
5877         }
5878         return (-1);
5879 }
5880 
5881 void
5882 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5883     struct cpuid_info *cpi)
5884 {
5885         dev_info_t *cpu_devi;
5886         int create;
5887 
5888         cpu_devi = (dev_info_t *)dip;
5889 
5890         /* device_type */
5891         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5892             "device_type", "cpu");
5893 
5894         /* reg */
5895         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5896             "reg", cpu_id);
5897 
5898         /* cpu-mhz, and clock-frequency */
5899         if (cpu_freq > 0) {
5900                 long long mul;
5901 
5902                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5903                     "cpu-mhz", cpu_freq);
5904                 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5905                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5906                             "clock-frequency", (int)mul);
5907         }
5908 
5909         if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5910                 return;
5911         }
5912 
5913         /* vendor-id */
5914         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5915             "vendor-id", cpi->cpi_vendorstr);
5916 
5917         if (cpi->cpi_maxeax == 0) {
5918                 return;
5919         }
5920 
5921         /*
5922          * family, model, and step
5923          */
5924         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5925             "family", CPI_FAMILY(cpi));
5926         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5927             "cpu-model", CPI_MODEL(cpi));
5928         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5929             "stepping-id", CPI_STEP(cpi));
5930 
5931         /* type */
5932         switch (cpi->cpi_vendor) {
5933         case X86_VENDOR_Intel:
5934                 create = 1;
5935                 break;
5936         default:
5937                 create = 0;
5938                 break;
5939         }
5940         if (create)
5941                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5942                     "type", CPI_TYPE(cpi));
5943 
5944         /* ext-family */
5945         switch (cpi->cpi_vendor) {
5946         case X86_VENDOR_Intel:
5947         case X86_VENDOR_AMD:
5948                 create = cpi->cpi_family >= 0xf;
5949                 break;
5950         default:
5951                 create = 0;
5952                 break;
5953         }
5954         if (create)
5955                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5956                     "ext-family", CPI_FAMILY_XTD(cpi));
5957 
5958         /* ext-model */
5959         switch (cpi->cpi_vendor) {
5960         case X86_VENDOR_Intel:
5961                 create = IS_EXTENDED_MODEL_INTEL(cpi);
5962                 break;
5963         case X86_VENDOR_AMD:
5964                 create = CPI_FAMILY(cpi) == 0xf;
5965                 break;
5966         default:
5967                 create = 0;
5968                 break;
5969         }
5970         if (create)
5971                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5972                     "ext-model", CPI_MODEL_XTD(cpi));
5973 
5974         /* generation */
5975         switch (cpi->cpi_vendor) {
5976         case X86_VENDOR_AMD:
5977                 /*
5978                  * AMD K5 model 1 was the first part to support this
5979                  */
5980                 create = cpi->cpi_xmaxeax >= 0x80000001;
5981                 break;
5982         default:
5983                 create = 0;
5984                 break;
5985         }
5986         if (create)
5987                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5988                     "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5989 
5990         /* brand-id */
5991         switch (cpi->cpi_vendor) {
5992         case X86_VENDOR_Intel:
5993                 /*
5994                  * brand id first appeared on Pentium III Xeon model 8,
5995                  * and Celeron model 8 processors and Opteron
5996                  */
5997                 create = cpi->cpi_family > 6 ||
5998                     (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5999                 break;
6000         case X86_VENDOR_AMD:
6001                 create = cpi->cpi_family >= 0xf;
6002                 break;
6003         default:
6004                 create = 0;
6005                 break;
6006         }
6007         if (create && cpi->cpi_brandid != 0) {
6008                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6009                     "brand-id", cpi->cpi_brandid);
6010         }
6011 
6012         /* chunks, and apic-id */
6013         switch (cpi->cpi_vendor) {
6014                 /*
6015                  * first available on Pentium IV and Opteron (K8)
6016                  */
6017         case X86_VENDOR_Intel:
6018                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6019                 break;
6020         case X86_VENDOR_AMD:
6021                 create = cpi->cpi_family >= 0xf;
6022                 break;
6023         default:
6024                 create = 0;
6025                 break;
6026         }
6027         if (create) {
6028                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6029                     "chunks", CPI_CHUNKS(cpi));
6030                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6031                     "apic-id", cpi->cpi_apicid);
6032                 if (cpi->cpi_chipid >= 0) {
6033                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6034                             "chip#", cpi->cpi_chipid);
6035                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6036                             "clog#", cpi->cpi_clogid);
6037                 }
6038         }
6039 
6040         /* cpuid-features */
6041         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6042             "cpuid-features", CPI_FEATURES_EDX(cpi));
6043 
6044 
6045         /* cpuid-features-ecx */
6046         switch (cpi->cpi_vendor) {
6047         case X86_VENDOR_Intel:
6048                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6049                 break;
6050         case X86_VENDOR_AMD:
6051                 create = cpi->cpi_family >= 0xf;
6052                 break;
6053         default:
6054                 create = 0;
6055                 break;
6056         }
6057         if (create)
6058                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6059                     "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6060 
6061         /* ext-cpuid-features */
6062         switch (cpi->cpi_vendor) {
6063         case X86_VENDOR_Intel:
6064         case X86_VENDOR_AMD:
6065         case X86_VENDOR_Cyrix:
6066         case X86_VENDOR_TM:
6067         case X86_VENDOR_Centaur:
6068                 create = cpi->cpi_xmaxeax >= 0x80000001;
6069                 break;
6070         default:
6071                 create = 0;
6072                 break;
6073         }
6074         if (create) {
6075                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6076                     "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6077                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6078                     "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6079         }
6080 
6081         /*
6082          * Brand String first appeared in Intel Pentium IV, AMD K5
6083          * model 1, and Cyrix GXm.  On earlier models we try and
6084          * simulate something similar .. so this string should always
6085          * same -something- about the processor, however lame.
6086          */
6087         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6088             "brand-string", cpi->cpi_brandstr);
6089 
6090         /*
6091          * Finally, cache and tlb information
6092          */
6093         switch (x86_which_cacheinfo(cpi)) {
6094         case X86_VENDOR_Intel:
6095                 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6096                 break;
6097         case X86_VENDOR_Cyrix:
6098                 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6099                 break;
6100         case X86_VENDOR_AMD:
6101                 amd_cache_info(cpi, cpu_devi);
6102                 break;
6103         default:
6104                 break;
6105         }
6106 }
6107 
6108 struct l2info {
6109         int *l2i_csz;
6110         int *l2i_lsz;
6111         int *l2i_assoc;
6112         int l2i_ret;
6113 };
6114 
6115 /*
6116  * A cacheinfo walker that fetches the size, line-size and associativity
6117  * of the L2 cache
6118  */
6119 static int
6120 intel_l2cinfo(void *arg, const struct cachetab *ct)
6121 {
6122         struct l2info *l2i = arg;
6123         int *ip;
6124 
6125         if (ct->ct_label != l2_cache_str &&
6126             ct->ct_label != sl2_cache_str)
6127                 return (0);     /* not an L2 -- keep walking */
6128 
6129         if ((ip = l2i->l2i_csz) != NULL)
6130                 *ip = ct->ct_size;
6131         if ((ip = l2i->l2i_lsz) != NULL)
6132                 *ip = ct->ct_line_size;
6133         if ((ip = l2i->l2i_assoc) != NULL)
6134                 *ip = ct->ct_assoc;
6135         l2i->l2i_ret = ct->ct_size;
6136         return (1);             /* was an L2 -- terminate walk */
6137 }
6138 
6139 /*
6140  * AMD L2/L3 Cache and TLB Associativity Field Definition:
6141  *
6142  *      Unlike the associativity for the L1 cache and tlb where the 8 bit
6143  *      value is the associativity, the associativity for the L2 cache and
6144  *      tlb is encoded in the following table. The 4 bit L2 value serves as
6145  *      an index into the amd_afd[] array to determine the associativity.
6146  *      -1 is undefined. 0 is fully associative.
6147  */
6148 
6149 static int amd_afd[] =
6150         {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6151 
6152 static void
6153 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6154 {
6155         struct cpuid_regs *cp;
6156         uint_t size, assoc;
6157         int i;
6158         int *ip;
6159 
6160         if (cpi->cpi_xmaxeax < 0x80000006)
6161                 return;
6162         cp = &cpi->cpi_extd[6];
6163 
6164         if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6165             (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6166                 uint_t cachesz = size * 1024;
6167                 assoc = amd_afd[i];
6168 
6169                 ASSERT(assoc != -1);
6170 
6171                 if ((ip = l2i->l2i_csz) != NULL)
6172                         *ip = cachesz;
6173                 if ((ip = l2i->l2i_lsz) != NULL)
6174                         *ip = BITX(cp->cp_ecx, 7, 0);
6175                 if ((ip = l2i->l2i_assoc) != NULL)
6176                         *ip = assoc;
6177                 l2i->l2i_ret = cachesz;
6178         }
6179 }
6180 
6181 int
6182 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6183 {
6184         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6185         struct l2info __l2info, *l2i = &__l2info;
6186 
6187         l2i->l2i_csz = csz;
6188         l2i->l2i_lsz = lsz;
6189         l2i->l2i_assoc = assoc;
6190         l2i->l2i_ret = -1;
6191 
6192         switch (x86_which_cacheinfo(cpi)) {
6193         case X86_VENDOR_Intel:
6194                 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6195                 break;
6196         case X86_VENDOR_Cyrix:
6197                 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6198                 break;
6199         case X86_VENDOR_AMD:
6200                 amd_l2cacheinfo(cpi, l2i);
6201                 break;
6202         default:
6203                 break;
6204         }
6205         return (l2i->l2i_ret);
6206 }
6207 
6208 #if !defined(__xpv)
6209 
6210 uint32_t *
6211 cpuid_mwait_alloc(cpu_t *cpu)
6212 {
6213         uint32_t        *ret;
6214         size_t          mwait_size;
6215 
6216         ASSERT(cpuid_checkpass(CPU, 2));
6217 
6218         mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6219         if (mwait_size == 0)
6220                 return (NULL);
6221 
6222         /*
6223          * kmem_alloc() returns cache line size aligned data for mwait_size
6224          * allocations.  mwait_size is currently cache line sized.  Neither
6225          * of these implementation details are guarantied to be true in the
6226          * future.
6227          *
6228          * First try allocating mwait_size as kmem_alloc() currently returns
6229          * correctly aligned memory.  If kmem_alloc() does not return
6230          * mwait_size aligned memory, then use mwait_size ROUNDUP.
6231          *
6232          * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6233          * decide to free this memory.
6234          */
6235         ret = kmem_zalloc(mwait_size, KM_SLEEP);
6236         if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6237                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6238                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6239                 *ret = MWAIT_RUNNING;
6240                 return (ret);
6241         } else {
6242                 kmem_free(ret, mwait_size);
6243                 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6244                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6245                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6246                 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6247                 *ret = MWAIT_RUNNING;
6248                 return (ret);
6249         }
6250 }
6251 
6252 void
6253 cpuid_mwait_free(cpu_t *cpu)
6254 {
6255         if (cpu->cpu_m.mcpu_cpi == NULL) {
6256                 return;
6257         }
6258 
6259         if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6260             cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6261                 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6262                     cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6263         }
6264 
6265         cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6266         cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6267 }
6268 
6269 void
6270 patch_tsc_read(int flag)
6271 {
6272         size_t cnt;
6273 
6274         switch (flag) {
6275         case TSC_NONE:
6276                 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6277                 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6278                 break;
6279         case TSC_RDTSC_MFENCE:
6280                 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6281                 (void) memcpy((void *)tsc_read,
6282                     (void *)&_tsc_mfence_start, cnt);
6283                 break;
6284         case TSC_RDTSC_LFENCE:
6285                 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6286                 (void) memcpy((void *)tsc_read,
6287                     (void *)&_tsc_lfence_start, cnt);
6288                 break;
6289         case TSC_TSCP:
6290                 cnt = &_tscp_end - &_tscp_start;
6291                 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6292                 break;
6293         default:
6294                 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6295                 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6296                 break;
6297         }
6298         tsc_type = flag;
6299 }
6300 
6301 int
6302 cpuid_deep_cstates_supported(void)
6303 {
6304         struct cpuid_info *cpi;
6305         struct cpuid_regs regs;
6306 
6307         ASSERT(cpuid_checkpass(CPU, 1));
6308 
6309         cpi = CPU->cpu_m.mcpu_cpi;
6310 
6311         if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6312                 return (0);
6313 
6314         switch (cpi->cpi_vendor) {
6315         case X86_VENDOR_Intel:
6316                 if (cpi->cpi_xmaxeax < 0x80000007)
6317                         return (0);
6318 
6319                 /*
6320                  * TSC run at a constant rate in all ACPI C-states?
6321                  */
6322                 regs.cp_eax = 0x80000007;
6323                 (void) __cpuid_insn(&regs);
6324                 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6325 
6326         default:
6327                 return (0);
6328         }
6329 }
6330 
6331 #endif  /* !__xpv */
6332 
6333 void
6334 post_startup_cpu_fixups(void)
6335 {
6336 #ifndef __xpv
6337         /*
6338          * Some AMD processors support C1E state. Entering this state will
6339          * cause the local APIC timer to stop, which we can't deal with at
6340          * this time.
6341          */
6342         if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6343                 on_trap_data_t otd;
6344                 uint64_t reg;
6345 
6346                 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6347                         reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6348                         /* Disable C1E state if it is enabled by BIOS */
6349                         if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6350                             AMD_ACTONCMPHALT_MASK) {
6351                                 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6352                                     AMD_ACTONCMPHALT_SHIFT);
6353                                 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6354                         }
6355                 }
6356                 no_trap();
6357         }
6358 #endif  /* !__xpv */
6359 }
6360 
6361 void
6362 enable_pcid(void)
6363 {
6364         if (x86_use_pcid == -1)
6365                 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6366 
6367         if (x86_use_invpcid == -1) {
6368                 x86_use_invpcid = is_x86_feature(x86_featureset,
6369                     X86FSET_INVPCID);
6370         }
6371 
6372         if (!x86_use_pcid)
6373                 return;
6374 
6375         /*
6376          * Intel say that on setting PCIDE, it immediately starts using the PCID
6377          * bits; better make sure there's nothing there.
6378          */
6379         ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6380 
6381         setcr4(getcr4() | CR4_PCIDE);
6382 }
6383 
6384 /*
6385  * Setup necessary registers to enable XSAVE feature on this processor.
6386  * This function needs to be called early enough, so that no xsave/xrstor
6387  * ops will execute on the processor before the MSRs are properly set up.
6388  *
6389  * Current implementation has the following assumption:
6390  * - cpuid_pass1() is done, so that X86 features are known.
6391  * - fpu_probe() is done, so that fp_save_mech is chosen.
6392  */
6393 void
6394 xsave_setup_msr(cpu_t *cpu)
6395 {
6396         ASSERT(fp_save_mech == FP_XSAVE);
6397         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6398 
6399         /* Enable OSXSAVE in CR4. */
6400         setcr4(getcr4() | CR4_OSXSAVE);
6401         /*
6402          * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6403          * correct value.
6404          */
6405         cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6406         setup_xfem();
6407 }
6408 
6409 /*
6410  * Starting with the Westmere processor the local
6411  * APIC timer will continue running in all C-states,
6412  * including the deepest C-states.
6413  */
6414 int
6415 cpuid_arat_supported(void)
6416 {
6417         struct cpuid_info *cpi;
6418         struct cpuid_regs regs;
6419 
6420         ASSERT(cpuid_checkpass(CPU, 1));
6421         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6422 
6423         cpi = CPU->cpu_m.mcpu_cpi;
6424 
6425         switch (cpi->cpi_vendor) {
6426         case X86_VENDOR_Intel:
6427                 /*
6428                  * Always-running Local APIC Timer is
6429                  * indicated by CPUID.6.EAX[2].
6430                  */
6431                 if (cpi->cpi_maxeax >= 6) {
6432                         regs.cp_eax = 6;
6433                         (void) cpuid_insn(NULL, &regs);
6434                         return (regs.cp_eax & CPUID_CSTATE_ARAT);
6435                 } else {
6436                         return (0);
6437                 }
6438         default:
6439                 return (0);
6440         }
6441 }
6442 
6443 /*
6444  * Check support for Intel ENERGY_PERF_BIAS feature
6445  */
6446 int
6447 cpuid_iepb_supported(struct cpu *cp)
6448 {
6449         struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6450         struct cpuid_regs regs;
6451 
6452         ASSERT(cpuid_checkpass(cp, 1));
6453 
6454         if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6455             !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6456                 return (0);
6457         }
6458 
6459         /*
6460          * Intel ENERGY_PERF_BIAS MSR is indicated by
6461          * capability bit CPUID.6.ECX.3
6462          */
6463         if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6464                 return (0);
6465 
6466         regs.cp_eax = 0x6;
6467         (void) cpuid_insn(NULL, &regs);
6468         return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6469 }
6470 
6471 /*
6472  * Check support for TSC deadline timer
6473  *
6474  * TSC deadline timer provides a superior software programming
6475  * model over local APIC timer that eliminates "time drifts".
6476  * Instead of specifying a relative time, software specifies an
6477  * absolute time as the target at which the processor should
6478  * generate a timer event.
6479  */
6480 int
6481 cpuid_deadline_tsc_supported(void)
6482 {
6483         struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6484         struct cpuid_regs regs;
6485 
6486         ASSERT(cpuid_checkpass(CPU, 1));
6487         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6488 
6489         switch (cpi->cpi_vendor) {
6490         case X86_VENDOR_Intel:
6491                 if (cpi->cpi_maxeax >= 1) {
6492                         regs.cp_eax = 1;
6493                         (void) cpuid_insn(NULL, &regs);
6494                         return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6495                 } else {
6496                         return (0);
6497                 }
6498         default:
6499                 return (0);
6500         }
6501 }
6502 
6503 #if defined(__amd64) && !defined(__xpv)
6504 /*
6505  * Patch in versions of bcopy for high performance Intel Nhm processors
6506  * and later...
6507  */
6508 void
6509 patch_memops(uint_t vendor)
6510 {
6511         size_t cnt, i;
6512         caddr_t to, from;
6513 
6514         if ((vendor == X86_VENDOR_Intel) &&
6515             is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6516                 cnt = &bcopy_patch_end - &bcopy_patch_start;
6517                 to = &bcopy_ck_size;
6518                 from = &bcopy_patch_start;
6519                 for (i = 0; i < cnt; i++) {
6520                         *to++ = *from++;
6521                 }
6522         }
6523 }
6524 #endif  /* __amd64 && !__xpv */
6525 
6526 /*
6527  * We're being asked to tell the system how many bits are required to represent
6528  * the various thread and strand IDs. While it's tempting to derive this based
6529  * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6530  * correct. Instead, this needs to be based on the number of bits that the APIC
6531  * allows for these different configurations. We only update these to a larger
6532  * value if we find one.
6533  */
6534 void
6535 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6536 {
6537         struct cpuid_info *cpi;
6538 
6539         VERIFY(cpuid_checkpass(CPU, 1));
6540         cpi = cpu->cpu_m.mcpu_cpi;
6541 
6542         if (cpi->cpi_ncore_bits > *core_nbits) {
6543                 *core_nbits = cpi->cpi_ncore_bits;
6544         }
6545 
6546         if (cpi->cpi_nthread_bits > *strand_nbits) {
6547                 *strand_nbits = cpi->cpi_nthread_bits;
6548         }
6549 }
6550 
6551 void
6552 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6553 {
6554         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6555         struct cpuid_regs cp;
6556 
6557         /*
6558          * Reread the CPUID portions that we need for various security
6559          * information.
6560          */
6561         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6562                 /*
6563                  * Check if we now have leaf 7 available to us.
6564                  */
6565                 if (cpi->cpi_maxeax < 7) {
6566                         bzero(&cp, sizeof (cp));
6567                         cp.cp_eax = 0;
6568                         cpi->cpi_maxeax = __cpuid_insn(&cp);
6569                         if (cpi->cpi_maxeax < 7)
6570                                 return;
6571                 }
6572 
6573                 bzero(&cp, sizeof (cp));
6574                 cp.cp_eax = 7;
6575                 cp.cp_ecx = 0;
6576                 (void) __cpuid_insn(&cp);
6577                 cpi->cpi_std[7] = cp;
6578         } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6579                 /* No xcpuid support */
6580                 if (cpi->cpi_family < 5 ||
6581                     (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6582                         return;
6583 
6584                 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6585                         bzero(&cp, sizeof (cp));
6586                         cp.cp_eax = CPUID_LEAF_EXT_0;
6587                         cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6588                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6589                                 return;
6590                         }
6591                 }
6592 
6593                 bzero(&cp, sizeof (cp));
6594                 cp.cp_eax = CPUID_LEAF_EXT_8;
6595                 (void) __cpuid_insn(&cp);
6596                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6597                 cpi->cpi_extd[8] = cp;
6598         } else {
6599                 /*
6600                  * Nothing to do here. Return an empty set which has already
6601                  * been zeroed for us.
6602                  */
6603                 return;
6604         }
6605         cpuid_scan_security(cpu, fset);
6606 }
6607 
6608 /* ARGSUSED */
6609 static int
6610 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6611 {
6612         uchar_t *fset;
6613 
6614         fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6615         cpuid_pass_ucode(CPU, fset);
6616 
6617         return (0);
6618 }
6619 
6620 /*
6621  * After a microcode update where the version has changed, then we need to
6622  * rescan CPUID. To do this we check every CPU to make sure that they have the
6623  * same microcode. Then we perform a cross call to all such CPUs. It's the
6624  * caller's job to make sure that no one else can end up doing an update while
6625  * this is going on.
6626  *
6627  * We assume that the system is microcode capable if we're called.
6628  */
6629 void
6630 cpuid_post_ucodeadm(void)
6631 {
6632         uint32_t rev;
6633         int i;
6634         struct cpu *cpu;
6635         cpuset_t cpuset;
6636         void *argdata;
6637         uchar_t *f0;
6638 
6639         argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6640 
6641         mutex_enter(&cpu_lock);
6642         cpu = cpu_get(0);
6643         rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6644         CPUSET_ONLY(cpuset, 0);
6645         for (i = 1; i < max_ncpus; i++) {
6646                 if ((cpu = cpu_get(i)) == NULL)
6647                         continue;
6648 
6649                 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6650                         panic("post microcode update CPU %d has differing "
6651                             "microcode revision (%u) from CPU 0 (%u)",
6652                             i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6653                 }
6654                 CPUSET_ADD(cpuset, i);
6655         }
6656 
6657         kpreempt_disable();
6658         xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6659             cpuid_post_ucodeadm_xc);
6660         kpreempt_enable();
6661 
6662         /*
6663          * OK, now look at each CPU and see if their feature sets are equal.
6664          */
6665         f0 = argdata;
6666         for (i = 1; i < max_ncpus; i++) {
6667                 uchar_t *fset;
6668                 if (!CPU_IN_SET(cpuset, i))
6669                         continue;
6670 
6671                 fset = (uchar_t *)((uintptr_t)argdata +
6672                     sizeof (x86_featureset) * i);
6673 
6674                 if (!compare_x86_featureset(f0, fset)) {
6675                         panic("Post microcode update CPU %d has "
6676                             "differing security feature (%p) set from CPU 0 "
6677                             "(%p), not appending to feature set", i,
6678                             (void *)fset, (void *)f0);
6679                 }
6680         }
6681 
6682         mutex_exit(&cpu_lock);
6683 
6684         for (i = 0; i < NUM_X86_FEATURES; i++) {
6685                 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6686                     x86_feature_names[i]);
6687                 if (is_x86_feature(f0, i)) {
6688                         add_x86_feature(x86_featureset, i);
6689                 }
6690         }
6691         kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6692 }