1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
  26  */
  27 /*
  28  * Copyright (c) 2010, Intel Corporation.
  29  * All rights reserved.
  30  */
  31 /*
  32  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  33  */
  34 /*
  35  * Copyright 2019, Joyent, Inc.
  36  */
  37 
  38 /*
  39  * CPU Identification logic
  40  *
  41  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  42  * with the identification of CPUs, their features, and their topologies. More
  43  * specifically, this file helps drive the following:
  44  *
  45  * 1. Enumeration of features of the processor which are used by the kernel to
  46  *    determine what features to enable or disable. These may be instruction set
  47  *    enhancements or features that we use.
  48  *
  49  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  50  *    will be told about through the auxiliary vector.
  51  *
  52  * 3. Understanding the physical topology of the CPU such as the number of
  53  *    caches, how many cores it has, whether or not it supports symmetric
  54  *    multi-processing (SMT), etc.
  55  *
  56  * ------------------------
  57  * CPUID History and Basics
  58  * ------------------------
  59  *
  60  * The cpuid instruction was added by Intel roughly around the time that the
  61  * original Pentium was introduced. The purpose of cpuid was to tell in a
  62  * programmatic fashion information about the CPU that previously was guessed
  63  * at. For example, an important part of cpuid is that we can know what
  64  * extensions to the ISA exist. If you use an invalid opcode you would get a
  65  * #UD, so this method allows a program (whether a user program or the kernel)
  66  * to determine what exists without crashing or getting a SIGILL. Of course,
  67  * this was also during the era of the clones and the AMD Am5x86. The vendor
  68  * name shows up first in cpuid for a reason.
  69  *
  70  * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
  71  * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
  72  * its own meaning. The different leaves are broken down into different regions:
  73  *
  74  *      [ 0, 7fffffff ]                 This region is called the 'basic'
  75  *                                      region. This region is generally defined
  76  *                                      by Intel, though some of the original
  77  *                                      portions have different meanings based
  78  *                                      on the manufacturer. These days, Intel
  79  *                                      adds most new features to this region.
  80  *                                      AMD adds non-Intel compatible
  81  *                                      information in the third, extended
  82  *                                      region. Intel uses this for everything
  83  *                                      including ISA extensions, CPU
  84  *                                      features, cache information, topology,
  85  *                                      and more.
  86  *
  87  *                                      There is a hole carved out of this
  88  *                                      region which is reserved for
  89  *                                      hypervisors.
  90  *
  91  *      [ 40000000, 4fffffff ]          This region, which is found in the
  92  *                                      middle of the previous region, is
  93  *                                      explicitly promised to never be used by
  94  *                                      CPUs. Instead, it is used by hypervisors
  95  *                                      to communicate information about
  96  *                                      themselves to the operating system. The
  97  *                                      values and details are unique for each
  98  *                                      hypervisor.
  99  *
 100  *      [ 80000000, ffffffff ]          This region is called the 'extended'
 101  *                                      region. Some of the low leaves mirror
 102  *                                      parts of the basic leaves. This region
 103  *                                      has generally been used by AMD for
 104  *                                      various extensions. For example, AMD-
 105  *                                      specific information about caches,
 106  *                                      features, and topology are found in this
 107  *                                      region.
 108  *
 109  * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
 110  * and %edx, and then issue the cpuid instruction. At the first leaf in each of
 111  * the ranges, one of the primary things returned is the maximum valid leaf in
 112  * that range. This allows for discovery of what range of CPUID is valid.
 113  *
 114  * The CPUs have potentially surprising behavior when using an invalid leaf or
 115  * unimplemented leaf. If the requested leaf is within the valid basic or
 116  * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
 117  * set to zero. However, if you specify a leaf that is outside of a valid range,
 118  * then instead it will be filled with the last valid _basic_ leaf. For example,
 119  * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
 120  * an invalid extended leaf will return the information for leaf 3.
 121  *
 122  * Some leaves are broken down into sub-leaves. This means that the value
 123  * depends on both the leaf asked for in %eax and a secondary register. For
 124  * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
 125  * additional information. Or when getting topology information in leaf 0xb, the
 126  * initial value in %ecx changes which level of the topology that you are
 127  * getting information about.
 128  *
 129  * cpuid values are always kept to 32 bits regardless of whether or not the
 130  * program is in 64-bit mode. When executing in 64-bit mode, the upper
 131  * 32 bits of the register are always set to zero so that way the values are the
 132  * same regardless of execution mode.
 133  *
 134  * ----------------------
 135  * Identifying Processors
 136  * ----------------------
 137  *
 138  * We can identify a processor in two steps. The first step looks at cpuid leaf
 139  * 0. Leaf 0 contains the processor's vendor information. This is done by
 140  * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
 141  * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
 142  *
 143  * From there, a processor is identified by a combination of three different
 144  * values:
 145  *
 146  *  1. Family
 147  *  2. Model
 148  *  3. Stepping
 149  *
 150  * Each vendor uses the family and model to uniquely identify a processor. The
 151  * way that family and model are changed depends on the vendor. For example,
 152  * Intel has been using family 0x6 for almost all of their processor since the
 153  * Pentium Pro/Pentium II era, often called the P6. The model is used to
 154  * identify the exact processor. Different models are often used for the client
 155  * (consumer) and server parts. Even though each processor often has major
 156  * architectural differences, they still are considered the same family by
 157  * Intel.
 158  *
 159  * On the other hand, each major AMD architecture generally has its own family.
 160  * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
 161  * the model number is used to help identify specific processors.
 162  *
 163  * The stepping is used to refer to a revision of a specific microprocessor. The
 164  * term comes from equipment used to produce masks that are used to create
 165  * integrated circuits.
 166  *
 167  * The information is present in leaf 1, %eax. In technical documentation you
 168  * will see the terms extended model and extended family. The original family,
 169  * model, and stepping fields were each 4 bits wide. If the values in either
 170  * are 0xf, then one is to consult the extended model and extended family, which
 171  * take previously reserved bits and allow for a larger number of models and add
 172  * 0xf to them.
 173  *
 174  * When we process this information, we store the full family, model, and
 175  * stepping in the struct cpuid_info members cpi_family, cpi_model, and
 176  * cpi_step, respectively. Whenever you are performing comparisons with the
 177  * family, model, and stepping, you should use these members and not the raw
 178  * values from cpuid. If you must use the raw values from cpuid directly, you
 179  * must make sure that you add the extended model and family to the base model
 180  * and family.
 181  *
 182  * In general, we do not use information about the family, model, and stepping
 183  * to determine whether or not a feature is present; that is generally driven by
 184  * specific leaves. However, when something we care about on the processor is
 185  * not considered 'architectural' meaning that it is specific to a set of
 186  * processors and not promised in the architecture model to be consistent from
 187  * generation to generation, then we will fall back on this information. The
 188  * most common cases where this comes up is when we have to workaround errata in
 189  * the processor, are dealing with processor-specific features such as CPU
 190  * performance counters, or we want to provide additional information for things
 191  * such as fault management.
 192  *
 193  * While processors also do have a brand string, which is the name that people
 194  * are familiar with when buying the processor, they are not meant for
 195  * programmatic consumption. That is what the family, model, and stepping are
 196  * for.
 197  *
 198  * ------------
 199  * CPUID Passes
 200  * ------------
 201  *
 202  * As part of performing feature detection, we break this into several different
 203  * passes. The passes are as follows:
 204  *
 205  *      Pass 0          This is a primordial pass done in locore.s to deal with
 206  *                      Cyrix CPUs that don't support cpuid. The reality is that
 207  *                      we likely don't run on them any more, but there is still
 208  *                      logic for handling them.
 209  *
 210  *      Pass 1          This is the primary pass and is responsible for doing a
 211  *                      large number of different things:
 212  *
 213  *                      1. Determine which vendor manufactured the CPU and
 214  *                      determining the family, model, and stepping information.
 215  *
 216  *                      2. Gathering a large number of feature flags to
 217  *                      determine which features the CPU support and which
 218  *                      indicate things that we need to do other work in the OS
 219  *                      to enable. Features detected this way are added to the
 220  *                      x86_featureset which can be queried to
 221  *                      determine what we should do. This includes processing
 222  *                      all of the basic and extended CPU features that we care
 223  *                      about.
 224  *
 225  *                      3. Determining the CPU's topology. This includes
 226  *                      information about how many cores and threads are present
 227  *                      in the package. It also is responsible for figuring out
 228  *                      which logical CPUs are potentially part of the same core
 229  *                      and what other resources they might share. For more
 230  *                      information see the 'Topology' section.
 231  *
 232  *                      4. Determining the set of CPU security-specific features
 233  *                      that we need to worry about and determine the
 234  *                      appropriate set of workarounds.
 235  *
 236  *                      Pass 1 on the boot CPU occurs before KMDB is started.
 237  *
 238  *      Pass 2          The second pass is done after startup(). Here, we check
 239  *                      other miscellaneous features. Most of this is gathering
 240  *                      additional basic and extended features that we'll use in
 241  *                      later passes or for debugging support.
 242  *
 243  *      Pass 3          The third pass occurs after the kernel memory allocator
 244  *                      has been fully initialized. This gathers information
 245  *                      where we might need dynamic memory available for our
 246  *                      uses. This includes several varying width leaves that
 247  *                      have cache information and the processor's brand string.
 248  *
 249  *      Pass 4          The fourth and final normal pass is performed after the
 250  *                      kernel has brought most everything online. This is
 251  *                      invoked from post_startup(). In this pass, we go through
 252  *                      the set of features that we have enabled and turn that
 253  *                      into the hardware auxiliary vector features that
 254  *                      userland receives. This is used by userland, primarily
 255  *                      by the run-time link-editor (RTLD), though userland
 256  *                      software could also refer to it directly.
 257  *
 258  *      Microcode       After a microcode update, we do a selective rescan of
 259  *                      the cpuid leaves to determine what features have
 260  *                      changed. Microcode updates can provide more details
 261  *                      about security related features to deal with issues like
 262  *                      Spectre and L1TF. On occasion, vendors have violated
 263  *                      their contract and removed bits. However, we don't try
 264  *                      to detect that because that puts us in a situation that
 265  *                      we really can't deal with. As such, the only thing we
 266  *                      rescan are security related features today. See
 267  *                      cpuid_pass_ucode().
 268  *
 269  * All of the passes (except pass 0) are run on all CPUs. However, for the most
 270  * part we only care about what the boot CPU says about this information and use
 271  * the other CPUs as a rough guide to sanity check that we have the same feature
 272  * set.
 273  *
 274  * We do not support running multiple logical CPUs with disjoint, let alone
 275  * different, feature sets.
 276  *
 277  * ------------------
 278  * Processor Topology
 279  * ------------------
 280  *
 281  * One of the important things that we need to do is to understand the topology
 282  * of the underlying processor. When we say topology in this case, we're trying
 283  * to understand the relationship between the logical CPUs that the operating
 284  * system sees and the underlying physical layout. Different logical CPUs may
 285  * share different resources which can have important consequences for the
 286  * performance of the system. For example, they may share caches, execution
 287  * units, and more.
 288  *
 289  * The topology of the processor changes from generation to generation and
 290  * vendor to vendor.  Along with that, different vendors use different
 291  * terminology, and the operating system itself uses occasionally overlapping
 292  * terminology. It's important to understand what this topology looks like so
 293  * one can understand the different things that we try to calculate and
 294  * determine.
 295  *
 296  * To get started, let's talk about a little bit of terminology that we've used
 297  * so far, is used throughout this file, and is fairly generic across multiple
 298  * vendors:
 299  *
 300  * CPU
 301  *      A central processing unit (CPU) refers to a logical and/or virtual
 302  *      entity that the operating system can execute instructions on. The
 303  *      underlying resources for this CPU may be shared between multiple
 304  *      entities; however, to the operating system it is a discrete unit.
 305  *
 306  * PROCESSOR and PACKAGE
 307  *
 308  *      Generally, when we use the term 'processor' on its own, we are referring
 309  *      to the physical entity that one buys and plugs into a board. However,
 310  *      because processor has been overloaded and one might see it used to mean
 311  *      multiple different levels, we will instead use the term 'package' for
 312  *      the rest of this file. The term package comes from the electrical
 313  *      engineering side and refers to the physical entity that encloses the
 314  *      electronics inside. Strictly speaking the package can contain more than
 315  *      just the CPU, for example, on many processors it may also have what's
 316  *      called an 'integrated graphical processing unit (GPU)'. Because the
 317  *      package can encapsulate multiple units, it is the largest physical unit
 318  *      that we refer to.
 319  *
 320  * SOCKET
 321  *
 322  *      A socket refers to unit on a system board (generally the motherboard)
 323  *      that can receive a package. A single package, or processor, is plugged
 324  *      into a single socket. A system may have multiple sockets. Often times,
 325  *      the term socket is used interchangeably with package and refers to the
 326  *      electrical component that has plugged in, and not the receptacle itself.
 327  *
 328  * CORE
 329  *
 330  *      A core refers to the physical instantiation of a CPU, generally, with a
 331  *      full set of hardware resources available to it. A package may contain
 332  *      multiple cores inside of it or it may just have a single one. A
 333  *      processor with more than one core is often referred to as 'multi-core'.
 334  *      In illumos, we will use the feature X86FSET_CMP to refer to a system
 335  *      that has 'multi-core' processors.
 336  *
 337  *      A core may expose a single logical CPU to the operating system, or it
 338  *      may expose multiple CPUs, which we call threads, defined below.
 339  *
 340  *      Some resources may still be shared by cores in the same package. For
 341  *      example, many processors will share the level 3 cache between cores.
 342  *      Some AMD generations share hardware resources between cores. For more
 343  *      information on that see the section 'AMD Topology'.
 344  *
 345  * THREAD and STRAND
 346  *
 347  *      In this file, generally a thread refers to a hardware resources and not
 348  *      the operating system's logical abstraction. A thread is always exposed
 349  *      as an independent logical CPU to the operating system. A thread belongs
 350  *      to a specific core. A core may have more than one thread. When that is
 351  *      the case, the threads that are part of the same core are often referred
 352  *      to as 'siblings'.
 353  *
 354  *      When multiple threads exist, this is generally referred to as
 355  *      simultaneous multi-threading (SMT). When Intel introduced this in their
 356  *      processors they called it hyper-threading (HT). When multiple threads
 357  *      are active in a core, they split the resources of the core. For example,
 358  *      two threads may share the same set of hardware execution units.
 359  *
 360  *      The operating system often uses the term 'strand' to refer to a thread.
 361  *      This helps disambiguate it from the software concept.
 362  *
 363  * CHIP
 364  *
 365  *      Unfortunately, the term 'chip' is dramatically overloaded. At its most
 366  *      base meaning, it is used to refer to a single integrated circuit, which
 367  *      may or may not be the only thing in the package. In illumos, when you
 368  *      see the term 'chip' it is almost always referring to the same thing as
 369  *      the 'package'. However, many vendors may use chip to refer to one of
 370  *      many integrated circuits that have been placed in the package. As an
 371  *      example, see the subsequent definition.
 372  *
 373  *      To try and keep things consistent, we will only use chip when referring
 374  *      to the entire integrated circuit package, with the exception of the
 375  *      definition of multi-chip module (because it is in the name) and use the
 376  *      term 'die' when we want the more general, potential sub-component
 377  *      definition.
 378  *
 379  * DIE
 380  *
 381  *      A die refers to an integrated circuit. Inside of the package there may
 382  *      be a single die or multiple dies. This is sometimes called a 'chip' in
 383  *      vendor's parlance, but in this file, we use the term die to refer to a
 384  *      subcomponent.
 385  *
 386  * MULTI-CHIP MODULE
 387  *
 388  *      A multi-chip module (MCM) refers to putting multiple distinct chips that
 389  *      are connected together in the same package. When a multi-chip design is
 390  *      used, generally each chip is manufactured independently and then joined
 391  *      together in the package. For example, on AMD's Zen microarchitecture
 392  *      (family 0x17), the package contains several dies (the second meaning of
 393  *      chip from above) that are connected together.
 394  *
 395  * CACHE
 396  *
 397  *      A cache is a part of the processor that maintains copies of recently
 398  *      accessed memory. Caches are split into levels and then into types.
 399  *      Commonly there are one to three levels, called level one, two, and
 400  *      three. The lower the level, the smaller it is, the closer it is to the
 401  *      execution units of the CPU, and the faster it is to access. The layout
 402  *      and design of the cache come in many different flavors, consult other
 403  *      resources for a discussion of those.
 404  *
 405  *      Caches are generally split into two types, the instruction and data
 406  *      cache. The caches contain what their names suggest, the instruction
 407  *      cache has executable program text, while the data cache has all other
 408  *      memory that the processor accesses. As of this writing, data is kept
 409  *      coherent between all of the caches on x86, so if one modifies program
 410  *      text before it is executed, that will be in the data cache, and the
 411  *      instruction cache will be synchronized with that change when the
 412  *      processor actually executes those instructions. This coherency also
 413  *      covers the fact that data could show up in multiple caches.
 414  *
 415  *      Generally, the lowest level caches are specific to a core. However, the
 416  *      last layer cache is shared between some number of cores. The number of
 417  *      CPUs sharing this last level cache is important. This has implications
 418  *      for the choices that the scheduler makes, as accessing memory that might
 419  *      be in a remote cache after thread migration can be quite expensive.
 420  *
 421  *      Sometimes, the word cache is abbreviated with a '$', because in US
 422  *      English the word cache is pronounced the same as cash. So L1D$ refers to
 423  *      the L1 data cache, and L2$ would be the L2 cache. This will not be used
 424  *      in the rest of this theory statement for clarity.
 425  *
 426  * MEMORY CONTROLLER
 427  *
 428  *      The memory controller is a component that provides access to DRAM. Each
 429  *      memory controller can access a set number of DRAM channels. Each channel
 430  *      can have a number of DIMMs (sticks of memory) associated with it. A
 431  *      given package may have more than one memory controller. The association
 432  *      of the memory controller to a group of cores is important as it is
 433  *      cheaper to access memory on the controller that you are associated with.
 434  *
 435  * NUMA
 436  *
 437  *      NUMA or non-uniform memory access, describes a way that systems are
 438  *      built. On x86, any processor core can address all of the memory in the
 439  *      system. However, When using multiple sockets or possibly within a
 440  *      multi-chip module, some of that memory is physically closer and some of
 441  *      it is further. Memory that is further away is more expensive to access.
 442  *      Consider the following image of multiple sockets with memory:
 443  *
 444  *      +--------+                                                +--------+
 445  *      | DIMM A |         +----------+      +----------+         | DIMM D |
 446  *      +--------+-+       |          |      |          |       +-+------+-+
 447  *        | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
 448  *        +--------+-+     |          |      |          |     +-+------+-+
 449  *          | DIMM C |     +----------+      +----------+     | DIMM F |
 450  *          +--------+                                        +--------+
 451  *
 452  *      In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
 453  *      closer to DIMMs D-F. This means that it is cheaper for socket 0 to
 454  *      access DIMMs A-C and more expensive to access D-F as it has to go
 455  *      through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
 456  *      D-F are cheaper than A-C. While the socket form is the most common, when
 457  *      using multi-chip modules, this can also sometimes occur. For another
 458  *      example of this that's more involved, see the AMD topology section.
 459  *
 460  *
 461  * Intel Topology
 462  * --------------
 463  *
 464  * Most Intel processors since Nehalem, (as of this writing the current gen
 465  * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
 466  * the package is a single monolithic die. MCMs currently aren't used. Most
 467  * parts have three levels of caches, with the L3 cache being shared between
 468  * all of the cores on the package. The L1/L2 cache is generally specific to
 469  * an individual core. The following image shows at a simplified level what
 470  * this looks like. The memory controller is commonly part of something called
 471  * the 'Uncore', that used to be separate physical chips that were not a part of
 472  * the package, but are now part of the same chip.
 473  *
 474  *  +-----------------------------------------------------------------------+
 475  *  | Package                                                               |
 476  *  |  +-------------------+  +-------------------+  +-------------------+  |
 477  *  |  | Core              |  | Core              |  | Core              |  |
 478  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 479  *  |  |  | Thread | | L | |  |  | Thread | | L | |  |  | Thread | | L | |  |
 480  *  |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |  +--------+ | 1 | |  |
 481  *  |  |  +--------+ |   | |  |  +--------+ |   | |  |  +--------+ |   | |  |
 482  *  |  |  | Thread | |   | |  |  | Thread | |   | |  |  | Thread | |   | |  |
 483  *  |  |  +--------+ +---+ |  |  +--------+ +---+ |  |  +--------+ +---+ |  |
 484  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 485  *  |  |  | L2 Cache     | |  |  | L2 Cache     | |  |  | L2 Cache     | |  |
 486  *  |  |  +--------------+ |  |  +--------------+ |  |  +--------------+ |  |
 487  *  |  +-------------------+  +-------------------+  +-------------------+  |
 488  *  | +-------------------------------------------------------------------+ |
 489  *  | |                         Shared L3 Cache                           | |
 490  *  | +-------------------------------------------------------------------+ |
 491  *  | +-------------------------------------------------------------------+ |
 492  *  | |                        Memory Controller                          | |
 493  *  | +-------------------------------------------------------------------+ |
 494  *  +-----------------------------------------------------------------------+
 495  *
 496  * A side effect of this current architecture is that what we care about from a
 497  * scheduling and topology perspective, is simplified. In general we care about
 498  * understanding which logical CPUs are part of the same core and socket.
 499  *
 500  * To determine the relationship between threads and cores, Intel initially used
 501  * the identifier in the advanced programmable interrupt controller (APIC). They
 502  * also added cpuid leaf 4 to give additional information about the number of
 503  * threads and CPUs in the processor. With the addition of x2apic (which
 504  * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
 505  * additional cpuid topology leaf 0xB was added.
 506  *
 507  * AMD Topology
 508  * ------------
 509  *
 510  * When discussing AMD topology, we want to break this into three distinct
 511  * generations of topology. There's the basic topology that has been used in
 512  * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
 513  * with family 0x15 (Bulldozer), and there's the topology that was introduced
 514  * with family 0x17 (Zen). AMD also has some additional terminology that's worth
 515  * talking about.
 516  *
 517  * Until the introduction of family 0x17 (Zen), AMD did not implement something
 518  * that they considered SMT. Whether or not the AMD processors have SMT
 519  * influences many things including scheduling and reliability, availability,
 520  * and serviceability (RAS) features.
 521  *
 522  * NODE
 523  *
 524  *      AMD uses the term node to refer to a die that contains a number of cores
 525  *      and I/O resources. Depending on the processor family and model, more
 526  *      than one node can be present in the package. When there is more than one
 527  *      node this indicates a multi-chip module. Usually each node has its own
 528  *      access to memory and I/O devices. This is important and generally
 529  *      different from the corresponding Intel Nehalem-Skylake+ processors. As a
 530  *      result, we track this relationship in the operating system.
 531  *
 532  *      In processors with an L3 cache, the L3 cache is generally shared across
 533  *      the entire node, though the way this is carved up varies from generation
 534  *      to generation.
 535  *
 536  * BULLDOZER
 537  *
 538  *      Starting with the Bulldozer family (0x15) and continuing until the
 539  *      introduction of the Zen microarchitecture, AMD introduced the idea of a
 540  *      compute unit. In a compute unit, two traditional cores share a number of
 541  *      hardware resources. Critically, they share the FPU, L1 instruction
 542  *      cache, and the L2 cache. Several compute units were then combined inside
 543  *      of a single node.  Because the integer execution units, L1 data cache,
 544  *      and some other resources were not shared between the cores, AMD never
 545  *      considered this to be SMT.
 546  *
 547  * ZEN
 548  *
 549  *      The Zen family (0x17) uses a multi-chip module (MCM) design, the module
 550  *      is called Zeppelin. These modules are similar to the idea of nodes used
 551  *      previously. Each of these nodes has two DRAM channels which all of the
 552  *      cores in the node can access uniformly. These nodes are linked together
 553  *      in the package, creating a NUMA environment.
 554  *
 555  *      The Zeppelin die itself contains two different 'core complexes'. Each
 556  *      core complex consists of four cores which each have two threads, for a
 557  *      total of 8 logical CPUs per complex. Unlike other generations,
 558  *      where all the logical CPUs in a given node share the L3 cache, here each
 559  *      core complex has its own shared L3 cache.
 560  *
 561  *      A further thing that we need to consider is that in some configurations,
 562  *      particularly with the Threadripper line of processors, not every die
 563  *      actually has its memory controllers wired up to actual memory channels.
 564  *      This means that some cores have memory attached to them and others
 565  *      don't.
 566  *
 567  *      To put Zen in perspective, consider the following images:
 568  *
 569  *      +--------------------------------------------------------+
 570  *      | Core Complex                                           |
 571  *      | +-------------------+    +-------------------+  +---+  |
 572  *      | | Core       +----+ |    | Core       +----+ |  |   |  |
 573  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  |   |  |
 574  *      | | | Thread | +----+ |    | | Thread | +----+ |  |   |  |
 575  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  | L |  |
 576  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  | 3 |  |
 577  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 578  *      | +-------------------+    +-------------------+  | C |  |
 579  *      | +-------------------+    +-------------------+  | a |  |
 580  *      | | Core       +----+ |    | Core       +----+ |  | c |  |
 581  *      | | +--------+ | L2 | |    | +--------+ | L2 | |  | h |  |
 582  *      | | | Thread | +----+ |    | | Thread | +----+ |  | e |  |
 583  *      | | +--------+-+ +--+ |    | +--------+-+ +--+ |  |   |  |
 584  *      | |   | Thread | |L1| |    |   | Thread | |L1| |  |   |  |
 585  *      | |   +--------+ +--+ |    |   +--------+ +--+ |  |   |  |
 586  *      | +-------------------+    +-------------------+  +---+  |
 587  *      |                                                        |
 588  *      +--------------------------------------------------------+
 589  *
 590  *  This first image represents a single Zen core complex that consists of four
 591  *  cores.
 592  *
 593  *
 594  *      +--------------------------------------------------------+
 595  *      | Zeppelin Die                                           |
 596  *      |  +--------------------------------------------------+  |
 597  *      |  |         I/O Units (PCIe, SATA, USB, etc.)        |  |
 598  *      |  +--------------------------------------------------+  |
 599  *      |                           HH                           |
 600  *      |          +-----------+    HH    +-----------+          |
 601  *      |          |           |    HH    |           |          |
 602  *      |          |    Core   |==========|    Core   |          |
 603  *      |          |  Complex  |==========|  Complex  |          |
 604  *      |          |           |    HH    |           |          |
 605  *      |          +-----------+    HH    +-----------+          |
 606  *      |                           HH                           |
 607  *      |  +--------------------------------------------------+  |
 608  *      |  |                Memory Controller                 |  |
 609  *      |  +--------------------------------------------------+  |
 610  *      |                                                        |
 611  *      +--------------------------------------------------------+
 612  *
 613  *  This image represents a single Zeppelin Die. Note how both cores are
 614  *  connected to the same memory controller and I/O units. While each core
 615  *  complex has its own L3 cache as seen in the first image, they both have
 616  *  uniform access to memory.
 617  *
 618  *
 619  *                      PP                     PP
 620  *                      PP                     PP
 621  *           +----------PP---------------------PP---------+
 622  *           |          PP                     PP         |
 623  *           |    +-----------+          +-----------+    |
 624  *           |    |           |          |           |    |
 625  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 626  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 627  *           |    |           |          |           |    |
 628  *           |    +-----------+ooo    ...+-----------+    |
 629  *           |          HH      ooo  ...       HH         |
 630  *           |          HH        oo..         HH         |
 631  *           |          HH        ..oo         HH         |
 632  *           |          HH      ...  ooo       HH         |
 633  *           |    +-----------+...    ooo+-----------+    |
 634  *           |    |           |          |           |    |
 635  *       MMMMMMMMM|  Zeppelin |==========|  Zeppelin |MMMMMMMMM
 636  *       MMMMMMMMM|    Die    |==========|    Die    |MMMMMMMMM
 637  *           |    |           |          |           |    |
 638  *           |    +-----------+          +-----------+    |
 639  *           |          PP                     PP         |
 640  *           +----------PP---------------------PP---------+
 641  *                      PP                     PP
 642  *                      PP                     PP
 643  *
 644  *  This image represents a single Zen package. In this example, it has four
 645  *  Zeppelin dies, though some configurations only have a single one. In this
 646  *  example, each die is directly connected to the next. Also, each die is
 647  *  represented as being connected to memory by the 'M' character and connected
 648  *  to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
 649  *  die is made up of two core complexes, we have multiple different NUMA
 650  *  domains that we care about for these systems.
 651  *
 652  * CPUID LEAVES
 653  *
 654  * There are a few different CPUID leaves that we can use to try and understand
 655  * the actual state of the world. As part of the introduction of family 0xf, AMD
 656  * added CPUID leaf 0x80000008. This leaf tells us the number of logical
 657  * processors that are in the system. Because families before Zen didn't have
 658  * SMT, this was always the number of cores that were in the system. However, it
 659  * should always be thought of as the number of logical threads to be consistent
 660  * between generations. In addition we also get the size of the APIC ID that is
 661  * used to represent the number of logical processors. This is important for
 662  * deriving topology information.
 663  *
 664  * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
 665  * bit between Bulldozer and later families, but it is quite useful in
 666  * determining the topology information. Because this information has changed
 667  * across family generations, it's worth calling out what these mean
 668  * explicitly. The registers have the following meanings:
 669  *
 670  *      %eax    The APIC ID. The entire register is defined to have a 32-bit
 671  *              APIC ID, even though on systems without x2apic support, it will
 672  *              be limited to 8 bits.
 673  *
 674  *      %ebx    On Bulldozer-era systems this contains information about the
 675  *              number of cores that are in a compute unit (cores that share
 676  *              resources). It also contains a per-package compute unit ID that
 677  *              identifies which compute unit the logical CPU is a part of.
 678  *
 679  *              On Zen-era systems this instead contains the number of threads
 680  *              per core and the ID of the core that the logical CPU is a part
 681  *              of. Note, this ID is unique only to the package, it is not
 682  *              globally unique across the entire system.
 683  *
 684  *      %ecx    This contains the number of nodes that exist in the package. It
 685  *              also contains an ID that identifies which node the logical CPU
 686  *              is a part of.
 687  *
 688  * Finally, we also use cpuid leaf 0x8000001D to determine information about the
 689  * cache layout to determine which logical CPUs are sharing which caches.
 690  *
 691  * illumos Topology
 692  * ----------------
 693  *
 694  * Based on the above we synthesize the information into several different
 695  * variables that we store in the 'struct cpuid_info'. We'll go into the details
 696  * of what each member is supposed to represent and their uniqueness. In
 697  * general, there are two levels of uniqueness that we care about. We care about
 698  * an ID that is globally unique. That means that it will be unique across all
 699  * entities in the system. For example, the default logical CPU ID is globally
 700  * unique. On the other hand, there is some information that we only care about
 701  * being unique within the context of a single package / socket. Here are the
 702  * variables that we keep track of and their meaning.
 703  *
 704  * Several of the values that are asking for an identifier, with the exception
 705  * of cpi_apicid, are allowed to be synthetic.
 706  *
 707  *
 708  * cpi_apicid
 709  *
 710  *      This is the value of the CPU's APIC id. This should be the full 32-bit
 711  *      ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
 712  *      APIC ID. This value is globally unique between all logical CPUs across
 713  *      all packages. This is usually required by the APIC.
 714  *
 715  * cpi_chipid
 716  *
 717  *      This value indicates the ID of the package that the logical CPU is a
 718  *      part of. This value is allowed to be synthetic. It is usually derived by
 719  *      taking the CPU's APIC ID and determining how many bits are used to
 720  *      represent CPU cores in the package. All logical CPUs that are part of
 721  *      the same package must have the same value.
 722  *
 723  * cpi_coreid
 724  *
 725  *      This represents the ID of a CPU core. Two logical CPUs should only have
 726  *      the same cpi_coreid value if they are part of the same core. These
 727  *      values may be synthetic. On systems that support SMT, this value is
 728  *      usually derived from the APIC ID, otherwise it is often synthetic and
 729  *      just set to the value of the cpu_id in the cpu_t.
 730  *
 731  * cpi_pkgcoreid
 732  *
 733  *      This is similar to the cpi_coreid in that logical CPUs that are part of
 734  *      the same core should have the same ID. The main difference is that these
 735  *      values are only required to be unique to a given socket.
 736  *
 737  * cpi_clogid
 738  *
 739  *      This represents the logical ID of a logical CPU. This value should be
 740  *      unique within a given socket for each logical CPU. This is allowed to be
 741  *      synthetic, though it is usually based off of the CPU's apic ID. The
 742  *      broader system expects that logical CPUs that have are part of the same
 743  *      core have contiguous numbers. For example, if there were two threads per
 744  *      core, then the core IDs divided by two should be the same and the first
 745  *      modulus two should be zero and the second one. For example, IDs 4 and 5
 746  *      indicate two logical CPUs that are part of the same core. But IDs 5 and
 747  *      6 represent two logical CPUs that are part of different cores.
 748  *
 749  *      While it is common for the cpi_coreid and the cpi_clogid to be derived
 750  *      from the same source, strictly speaking, they don't have to be and the
 751  *      two values should be considered logically independent. One should not
 752  *      try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
 753  *      some kind of relationship. While this is tempting, we've seen cases on
 754  *      AMD family 0xf where the system's cpu id is not related to its APIC ID.
 755  *
 756  * cpi_ncpu_per_chip
 757  *
 758  *      This value indicates the total number of logical CPUs that exist in the
 759  *      physical package. Critically, this is not the number of logical CPUs
 760  *      that exist for just the single core.
 761  *
 762  *      This value should be the same for all logical CPUs in the same package.
 763  *
 764  * cpi_ncore_per_chip
 765  *
 766  *      This value indicates the total number of physical CPU cores that exist
 767  *      in the package. The system compares this value with cpi_ncpu_per_chip to
 768  *      determine if simultaneous multi-threading (SMT) is enabled. When
 769  *      cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
 770  *      the X86FSET_HTT feature is not set. If this value is greater than one,
 771  *      than we consider the processor to have the feature X86FSET_CMP, to
 772  *      indicate that there is support for more than one core.
 773  *
 774  *      This value should be the same for all logical CPUs in the same package.
 775  *
 776  * cpi_procnodes_per_pkg
 777  *
 778  *      This value indicates the number of 'nodes' that exist in the package.
 779  *      When processors are actually a multi-chip module, this represents the
 780  *      number of such modules that exist in the package. Currently, on Intel
 781  *      based systems this member is always set to 1.
 782  *
 783  *      This value should be the same for all logical CPUs in the same package.
 784  *
 785  * cpi_procnodeid
 786  *
 787  *      This value indicates the ID of the node that the logical CPU is a part
 788  *      of. All logical CPUs that are in the same node must have the same value
 789  *      here. This value must be unique across all of the packages in the
 790  *      system.  On Intel based systems, this is currently set to the value in
 791  *      cpi_chipid because there is only one node.
 792  *
 793  * cpi_cores_per_compunit
 794  *
 795  *      This value indicates the number of cores that are part of a compute
 796  *      unit. See the AMD topology section for this. This member only has real
 797  *      meaning currently for AMD Bulldozer family processors. For all other
 798  *      processors, this should currently be set to 1.
 799  *
 800  * cpi_compunitid
 801  *
 802  *      This indicates the compute unit that the logical CPU belongs to. For
 803  *      processors without AMD Bulldozer-style compute units this should be set
 804  *      to the value of cpi_coreid.
 805  *
 806  * cpi_ncpu_shr_last_cache
 807  *
 808  *      This indicates the number of logical CPUs that are sharing the same last
 809  *      level cache. This value should be the same for all CPUs that are sharing
 810  *      that cache. The last cache refers to the cache that is closest to memory
 811  *      and furthest away from the CPU.
 812  *
 813  * cpi_last_lvl_cacheid
 814  *
 815  *      This indicates the ID of the last cache that the logical CPU uses. This
 816  *      cache is often shared between multiple logical CPUs and is the cache
 817  *      that is closest to memory and furthest away from the CPU. This value
 818  *      should be the same for a group of logical CPUs only if they actually
 819  *      share the same last level cache. IDs should not overlap between
 820  *      packages.
 821  *
 822  * cpi_ncore_bits
 823  *
 824  *      This indicates the number of bits that are required to represent all of
 825  *      the cores in the system. As cores are derived based on their APIC IDs,
 826  *      we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
 827  *      this value to be larger than the actual number of IDs that are present
 828  *      in the system. This is used to size tables by the CMI framework. It is
 829  *      only filled in for Intel and AMD CPUs.
 830  *
 831  * cpi_nthread_bits
 832  *
 833  *      This indicates the number of bits required to represent all of the IDs
 834  *      that cover the logical CPUs that exist on a given core. It's OK for this
 835  *      value to be larger than the actual number of IDs that are present in the
 836  *      system.  This is used to size tables by the CMI framework. It is
 837  *      only filled in for Intel and AMD CPUs.
 838  *
 839  * -----------
 840  * Hypervisors
 841  * -----------
 842  *
 843  * If trying to manage the differences between vendors wasn't bad enough, it can
 844  * get worse thanks to our friend hardware virtualization. Hypervisors are given
 845  * the ability to interpose on all cpuid instructions and change them to suit
 846  * their purposes. In general, this is necessary as the hypervisor wants to be
 847  * able to present a more uniform set of features or not necessarily give the
 848  * guest operating system kernel knowledge of all features so it can be
 849  * more easily migrated between systems.
 850  *
 851  * When it comes to trying to determine topology information, this can be a
 852  * double edged sword. When a hypervisor doesn't actually implement a cpuid
 853  * leaf, it'll often return all zeros. Because of that, you'll often see various
 854  * checks scattered about fields being non-zero before we assume we can use
 855  * them.
 856  *
 857  * When it comes to topology information, the hypervisor is often incentivized
 858  * to lie to you about topology. This is because it doesn't always actually
 859  * guarantee that topology at all. The topology path we take in the system
 860  * depends on how the CPU advertises itself. If it advertises itself as an Intel
 861  * or AMD CPU, then we basically do our normal path. However, when they don't
 862  * use an actual vendor, then that usually turns into multiple one-core CPUs
 863  * that we enumerate that are often on different sockets. The actual behavior
 864  * depends greatly on what the hypervisor actually exposes to us.
 865  *
 866  * --------------------
 867  * Exposing Information
 868  * --------------------
 869  *
 870  * We expose CPUID information in three different forms in the system.
 871  *
 872  * The first is through the x86_featureset variable. This is used in conjunction
 873  * with the is_x86_feature() function. This is queried by x86-specific functions
 874  * to determine which features are or aren't present in the system and to make
 875  * decisions based upon them. For example, users of this include everything from
 876  * parts of the system dedicated to reliability, availability, and
 877  * serviceability (RAS), to making decisions about how to handle security
 878  * mitigations, to various x86-specific drivers. General purpose or
 879  * architecture independent drivers should never be calling this function.
 880  *
 881  * The second means is through the auxiliary vector. The auxiliary vector is a
 882  * series of tagged data that the kernel passes down to a user program when it
 883  * begins executing. This information is used to indicate to programs what
 884  * instruction set extensions are present. For example, information about the
 885  * CPU supporting the machine check architecture (MCA) wouldn't be passed down
 886  * since user programs cannot make use of it. However, things like the AVX
 887  * instruction sets are. Programs use this information to make run-time
 888  * decisions about what features they should use. As an example, the run-time
 889  * link-editor (rtld) can relocate different functions depending on the hardware
 890  * support available.
 891  *
 892  * The final form is through a series of accessor functions that all have the
 893  * form cpuid_get*. This is used by a number of different subsystems in the
 894  * kernel to determine more detailed information about what we're running on,
 895  * topology information, etc. Some of these subsystems include processor groups
 896  * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
 897  * microcode, and performance monitoring. These functions all ASSERT that the
 898  * CPU they're being called on has reached a certain cpuid pass. If the passes
 899  * are rearranged, then this needs to be adjusted.
 900  */
 901 
 902 #include <sys/types.h>
 903 #include <sys/archsystm.h>
 904 #include <sys/x86_archext.h>
 905 #include <sys/kmem.h>
 906 #include <sys/systm.h>
 907 #include <sys/cmn_err.h>
 908 #include <sys/sunddi.h>
 909 #include <sys/sunndi.h>
 910 #include <sys/cpuvar.h>
 911 #include <sys/processor.h>
 912 #include <sys/sysmacros.h>
 913 #include <sys/pg.h>
 914 #include <sys/fp.h>
 915 #include <sys/controlregs.h>
 916 #include <sys/bitmap.h>
 917 #include <sys/auxv_386.h>
 918 #include <sys/memnode.h>
 919 #include <sys/pci_cfgspace.h>
 920 #include <sys/comm_page.h>
 921 #include <sys/mach_mmu.h>
 922 #include <sys/ucode.h>
 923 #include <sys/tsc.h>
 924 
 925 #ifdef __xpv
 926 #include <sys/hypervisor.h>
 927 #else
 928 #include <sys/ontrap.h>
 929 #endif
 930 
 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
 932 uint_t x86_type = X86_TYPE_OTHER;
 933 uint_t x86_clflush_size = 0;
 934 
 935 #if defined(__xpv)
 936 int x86_use_pcid = 0;
 937 int x86_use_invpcid = 0;
 938 #else
 939 int x86_use_pcid = -1;
 940 int x86_use_invpcid = -1;
 941 #endif
 942 
 943 uint_t pentiumpro_bug4046376;
 944 
 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 946 
 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
 948         "lgpg",
 949         "tsc",
 950         "msr",
 951         "mtrr",
 952         "pge",
 953         "de",
 954         "cmov",
 955         "mmx",
 956         "mca",
 957         "pae",
 958         "cv8",
 959         "pat",
 960         "sep",
 961         "sse",
 962         "sse2",
 963         "htt",
 964         "asysc",
 965         "nx",
 966         "sse3",
 967         "cx16",
 968         "cmp",
 969         "tscp",
 970         "mwait",
 971         "sse4a",
 972         "cpuid",
 973         "ssse3",
 974         "sse4_1",
 975         "sse4_2",
 976         "1gpg",
 977         "clfsh",
 978         "64",
 979         "aes",
 980         "pclmulqdq",
 981         "xsave",
 982         "avx",
 983         "vmx",
 984         "svm",
 985         "topoext",
 986         "f16c",
 987         "rdrand",
 988         "x2apic",
 989         "avx2",
 990         "bmi1",
 991         "bmi2",
 992         "fma",
 993         "smep",
 994         "smap",
 995         "adx",
 996         "rdseed",
 997         "mpx",
 998         "avx512f",
 999         "avx512dq",
1000         "avx512pf",
1001         "avx512er",
1002         "avx512cd",
1003         "avx512bw",
1004         "avx512vl",
1005         "avx512fma",
1006         "avx512vbmi",
1007         "avx512_vpopcntdq",
1008         "avx512_4vnniw",
1009         "avx512_4fmaps",
1010         "xsaveopt",
1011         "xsavec",
1012         "xsaves",
1013         "sha",
1014         "umip",
1015         "pku",
1016         "ospke",
1017         "pcid",
1018         "invpcid",
1019         "ibrs",
1020         "ibpb",
1021         "stibp",
1022         "ssbd",
1023         "ssbd_virt",
1024         "rdcl_no",
1025         "ibrs_all",
1026         "rsba",
1027         "ssb_no",
1028         "stibp_all",
1029         "flush_cmd",
1030         "l1d_vmentry_no",
1031         "fsgsbase",
1032         "clflushopt",
1033         "clwb",
1034         "monitorx",
1035         "clzero",
1036         "xop",
1037         "fma4",
1038         "tbm",
1039         "avx512_vnni",
1040         "amd_pcec"
1041 };
1042 
1043 boolean_t
1044 is_x86_feature(void *featureset, uint_t feature)
1045 {
1046         ASSERT(feature < NUM_X86_FEATURES);
1047         return (BT_TEST((ulong_t *)featureset, feature));
1048 }
1049 
1050 void
1051 add_x86_feature(void *featureset, uint_t feature)
1052 {
1053         ASSERT(feature < NUM_X86_FEATURES);
1054         BT_SET((ulong_t *)featureset, feature);
1055 }
1056 
1057 void
1058 remove_x86_feature(void *featureset, uint_t feature)
1059 {
1060         ASSERT(feature < NUM_X86_FEATURES);
1061         BT_CLEAR((ulong_t *)featureset, feature);
1062 }
1063 
1064 boolean_t
1065 compare_x86_featureset(void *setA, void *setB)
1066 {
1067         /*
1068          * We assume that the unused bits of the bitmap are always zero.
1069          */
1070         if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1071                 return (B_TRUE);
1072         } else {
1073                 return (B_FALSE);
1074         }
1075 }
1076 
1077 void
1078 print_x86_featureset(void *featureset)
1079 {
1080         uint_t i;
1081 
1082         for (i = 0; i < NUM_X86_FEATURES; i++) {
1083                 if (is_x86_feature(featureset, i)) {
1084                         cmn_err(CE_CONT, "?x86_feature: %s\n",
1085                             x86_feature_names[i]);
1086                 }
1087         }
1088 }
1089 
1090 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1091 static size_t xsave_state_size = 0;
1092 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1093 boolean_t xsave_force_disable = B_FALSE;
1094 extern int disable_smap;
1095 
1096 /*
1097  * This is set to platform type we are running on.
1098  */
1099 static int platform_type = -1;
1100 
1101 #if !defined(__xpv)
1102 /*
1103  * Variable to patch if hypervisor platform detection needs to be
1104  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1105  */
1106 int enable_platform_detection = 1;
1107 #endif
1108 
1109 /*
1110  * monitor/mwait info.
1111  *
1112  * size_actual and buf_actual are the real address and size allocated to get
1113  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
1114  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
1115  * processor cache-line alignment, but this is not guarantied in the furture.
1116  */
1117 struct mwait_info {
1118         size_t          mon_min;        /* min size to avoid missed wakeups */
1119         size_t          mon_max;        /* size to avoid false wakeups */
1120         size_t          size_actual;    /* size actually allocated */
1121         void            *buf_actual;    /* memory actually allocated */
1122         uint32_t        support;        /* processor support of monitor/mwait */
1123 };
1124 
1125 /*
1126  * xsave/xrestor info.
1127  *
1128  * This structure contains HW feature bits and the size of the xsave save area.
1129  * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1130  * (xsave_state) to describe the xsave layout. However, at runtime the
1131  * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1132  * xsave_state structure simply represents the legacy layout of the beginning
1133  * of the xsave area.
1134  */
1135 struct xsave_info {
1136         uint32_t        xsav_hw_features_low;   /* Supported HW features */
1137         uint32_t        xsav_hw_features_high;  /* Supported HW features */
1138         size_t          xsav_max_size;  /* max size save area for HW features */
1139         size_t          ymm_size;       /* AVX: size of ymm save area */
1140         size_t          ymm_offset;     /* AVX: offset for ymm save area */
1141         size_t          bndregs_size;   /* MPX: size of bndregs save area */
1142         size_t          bndregs_offset; /* MPX: offset for bndregs save area */
1143         size_t          bndcsr_size;    /* MPX: size of bndcsr save area */
1144         size_t          bndcsr_offset;  /* MPX: offset for bndcsr save area */
1145         size_t          opmask_size;    /* AVX512: size of opmask save */
1146         size_t          opmask_offset;  /* AVX512: offset for opmask save */
1147         size_t          zmmlo_size;     /* AVX512: size of zmm 256 save */
1148         size_t          zmmlo_offset;   /* AVX512: offset for zmm 256 save */
1149         size_t          zmmhi_size;     /* AVX512: size of zmm hi reg save */
1150         size_t          zmmhi_offset;   /* AVX512: offset for zmm hi reg save */
1151 };
1152 
1153 
1154 /*
1155  * These constants determine how many of the elements of the
1156  * cpuid we cache in the cpuid_info data structure; the
1157  * remaining elements are accessible via the cpuid instruction.
1158  */
1159 
1160 #define NMAX_CPI_STD    8               /* eax = 0 .. 7 */
1161 #define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
1162 
1163 /*
1164  * See the big theory statement for a more detailed explanation of what some of
1165  * these members mean.
1166  */
1167 struct cpuid_info {
1168         uint_t cpi_pass;                /* last pass completed */
1169         /*
1170          * standard function information
1171          */
1172         uint_t cpi_maxeax;              /* fn 0: %eax */
1173         char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
1174         uint_t cpi_vendor;              /* enum of cpi_vendorstr */
1175 
1176         uint_t cpi_family;              /* fn 1: extended family */
1177         uint_t cpi_model;               /* fn 1: extended model */
1178         uint_t cpi_step;                /* fn 1: stepping */
1179         chipid_t cpi_chipid;            /* fn 1: %ebx:  Intel: chip # */
1180                                         /*              AMD: package/socket # */
1181         uint_t cpi_brandid;             /* fn 1: %ebx: brand ID */
1182         int cpi_clogid;                 /* fn 1: %ebx: thread # */
1183         uint_t cpi_ncpu_per_chip;       /* fn 1: %ebx: logical cpu count */
1184         uint8_t cpi_cacheinfo[16];      /* fn 2: intel-style cache desc */
1185         uint_t cpi_ncache;              /* fn 2: number of elements */
1186         uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1187         id_t cpi_last_lvl_cacheid;      /* fn 4: %eax: derived cache id */
1188         uint_t cpi_cache_leaf_size;     /* Number of cache elements */
1189                                         /* Intel fn: 4, AMD fn: 8000001d */
1190         struct cpuid_regs **cpi_cache_leaves;   /* Acual leaves from above */
1191         struct cpuid_regs cpi_std[NMAX_CPI_STD];        /* 0 .. 7 */
1192         /*
1193          * extended function information
1194          */
1195         uint_t cpi_xmaxeax;             /* fn 0x80000000: %eax */
1196         char cpi_brandstr[49];          /* fn 0x8000000[234] */
1197         uint8_t cpi_pabits;             /* fn 0x80000006: %eax */
1198         uint8_t cpi_vabits;             /* fn 0x80000006: %eax */
1199         uint8_t cpi_fp_amd_save;        /* AMD: FP error pointer save rqd. */
1200         struct  cpuid_regs cpi_extd[NMAX_CPI_EXTD];     /* 0x800000XX */
1201 
1202         id_t cpi_coreid;                /* same coreid => strands share core */
1203         int cpi_pkgcoreid;              /* core number within single package */
1204         uint_t cpi_ncore_per_chip;      /* AMD: fn 0x80000008: %ecx[7-0] */
1205                                         /* Intel: fn 4: %eax[31-26] */
1206 
1207         /*
1208          * These values represent the number of bits that are required to store
1209          * information about the number of cores and threads.
1210          */
1211         uint_t cpi_ncore_bits;
1212         uint_t cpi_nthread_bits;
1213         /*
1214          * supported feature information
1215          */
1216         uint32_t cpi_support[6];
1217 #define STD_EDX_FEATURES        0
1218 #define AMD_EDX_FEATURES        1
1219 #define TM_EDX_FEATURES         2
1220 #define STD_ECX_FEATURES        3
1221 #define AMD_ECX_FEATURES        4
1222 #define STD_EBX_FEATURES        5
1223         /*
1224          * Synthesized information, where known.
1225          */
1226         uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
1227         const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
1228         uint32_t cpi_socket;            /* Chip package/socket type */
1229 
1230         struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
1231         uint32_t cpi_apicid;
1232         uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
1233         uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
1234                                         /* Intel: 1 */
1235         uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
1236         uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
1237 
1238         struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
1239 };
1240 
1241 
1242 static struct cpuid_info cpuid_info0;
1243 
1244 /*
1245  * These bit fields are defined by the Intel Application Note AP-485
1246  * "Intel Processor Identification and the CPUID Instruction"
1247  */
1248 #define CPI_FAMILY_XTD(cpi)     BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1249 #define CPI_MODEL_XTD(cpi)      BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1250 #define CPI_TYPE(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1251 #define CPI_FAMILY(cpi)         BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1252 #define CPI_STEP(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1253 #define CPI_MODEL(cpi)          BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1254 
1255 #define CPI_FEATURES_EDX(cpi)           ((cpi)->cpi_std[1].cp_edx)
1256 #define CPI_FEATURES_ECX(cpi)           ((cpi)->cpi_std[1].cp_ecx)
1257 #define CPI_FEATURES_XTD_EDX(cpi)       ((cpi)->cpi_extd[1].cp_edx)
1258 #define CPI_FEATURES_XTD_ECX(cpi)       ((cpi)->cpi_extd[1].cp_ecx)
1259 #define CPI_FEATURES_7_0_EBX(cpi)       ((cpi)->cpi_std[7].cp_ebx)
1260 #define CPI_FEATURES_7_0_ECX(cpi)       ((cpi)->cpi_std[7].cp_ecx)
1261 #define CPI_FEATURES_7_0_EDX(cpi)       ((cpi)->cpi_std[7].cp_edx)
1262 
1263 #define CPI_BRANDID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1264 #define CPI_CHUNKS(cpi)         BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1265 #define CPI_CPU_COUNT(cpi)      BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1266 #define CPI_APIC_ID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1267 
1268 #define CPI_MAXEAX_MAX          0x100           /* sanity control */
1269 #define CPI_XMAXEAX_MAX         0x80000100
1270 #define CPI_FN4_ECX_MAX         0x20            /* sanity: max fn 4 levels */
1271 #define CPI_FNB_ECX_MAX         0x20            /* sanity: max fn B levels */
1272 
1273 /*
1274  * Function 4 (Deterministic Cache Parameters) macros
1275  * Defined by Intel Application Note AP-485
1276  */
1277 #define CPI_NUM_CORES(regs)             BITX((regs)->cp_eax, 31, 26)
1278 #define CPI_NTHR_SHR_CACHE(regs)        BITX((regs)->cp_eax, 25, 14)
1279 #define CPI_FULL_ASSOC_CACHE(regs)      BITX((regs)->cp_eax, 9, 9)
1280 #define CPI_SELF_INIT_CACHE(regs)       BITX((regs)->cp_eax, 8, 8)
1281 #define CPI_CACHE_LVL(regs)             BITX((regs)->cp_eax, 7, 5)
1282 #define CPI_CACHE_TYPE(regs)            BITX((regs)->cp_eax, 4, 0)
1283 #define CPI_CPU_LEVEL_TYPE(regs)        BITX((regs)->cp_ecx, 15, 8)
1284 
1285 #define CPI_CACHE_WAYS(regs)            BITX((regs)->cp_ebx, 31, 22)
1286 #define CPI_CACHE_PARTS(regs)           BITX((regs)->cp_ebx, 21, 12)
1287 #define CPI_CACHE_COH_LN_SZ(regs)       BITX((regs)->cp_ebx, 11, 0)
1288 
1289 #define CPI_CACHE_SETS(regs)            BITX((regs)->cp_ecx, 31, 0)
1290 
1291 #define CPI_PREFCH_STRIDE(regs)         BITX((regs)->cp_edx, 9, 0)
1292 
1293 
1294 /*
1295  * A couple of shorthand macros to identify "later" P6-family chips
1296  * like the Pentium M and Core.  First, the "older" P6-based stuff
1297  * (loosely defined as "pre-Pentium-4"):
1298  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1299  */
1300 #define IS_LEGACY_P6(cpi) (                     \
1301         cpi->cpi_family == 6 &&                      \
1302                 (cpi->cpi_model == 1 ||              \
1303                 cpi->cpi_model == 3 ||               \
1304                 cpi->cpi_model == 5 ||               \
1305                 cpi->cpi_model == 6 ||               \
1306                 cpi->cpi_model == 7 ||               \
1307                 cpi->cpi_model == 8 ||               \
1308                 cpi->cpi_model == 0xA ||     \
1309                 cpi->cpi_model == 0xB)               \
1310 )
1311 
1312 /* A "new F6" is everything with family 6 that's not the above */
1313 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1314 
1315 /* Extended family/model support */
1316 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1317         cpi->cpi_family >= 0xf)
1318 
1319 /*
1320  * Info for monitor/mwait idle loop.
1321  *
1322  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1323  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1324  * 2006.
1325  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1326  * Documentation Updates" #33633, Rev 2.05, December 2006.
1327  */
1328 #define MWAIT_SUPPORT           (0x00000001)    /* mwait supported */
1329 #define MWAIT_EXTENSIONS        (0x00000002)    /* extenstion supported */
1330 #define MWAIT_ECX_INT_ENABLE    (0x00000004)    /* ecx 1 extension supported */
1331 #define MWAIT_SUPPORTED(cpi)    ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1332 #define MWAIT_INT_ENABLE(cpi)   ((cpi)->cpi_std[5].cp_ecx & 0x2)
1333 #define MWAIT_EXTENSION(cpi)    ((cpi)->cpi_std[5].cp_ecx & 0x1)
1334 #define MWAIT_SIZE_MIN(cpi)     BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1335 #define MWAIT_SIZE_MAX(cpi)     BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1336 /*
1337  * Number of sub-cstates for a given c-state.
1338  */
1339 #define MWAIT_NUM_SUBC_STATES(cpi, c_state)                     \
1340         BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1341 
1342 /*
1343  * XSAVE leaf 0xD enumeration
1344  */
1345 #define CPUID_LEAFD_2_YMM_OFFSET        576
1346 #define CPUID_LEAFD_2_YMM_SIZE          256
1347 
1348 /*
1349  * Common extended leaf names to cut down on typos.
1350  */
1351 #define CPUID_LEAF_EXT_0                0x80000000
1352 #define CPUID_LEAF_EXT_8                0x80000008
1353 #define CPUID_LEAF_EXT_1d               0x8000001d
1354 #define CPUID_LEAF_EXT_1e               0x8000001e
1355 
1356 /*
1357  * Functions we consune from cpuid_subr.c;  don't publish these in a header
1358  * file to try and keep people using the expected cpuid_* interfaces.
1359  */
1360 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1361 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1362 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1363 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1364 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1365 
1366 /*
1367  * Apply up various platform-dependent restrictions where the
1368  * underlying platform restrictions mean the CPU can be marked
1369  * as less capable than its cpuid instruction would imply.
1370  */
1371 #if defined(__xpv)
1372 static void
1373 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1374 {
1375         switch (eax) {
1376         case 1: {
1377                 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1378                     0 : CPUID_INTC_EDX_MCA;
1379                 cp->cp_edx &=
1380                     ~(mcamask |
1381                     CPUID_INTC_EDX_PSE |
1382                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1383                     CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1384                     CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1385                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1386                     CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1387                 break;
1388         }
1389 
1390         case 0x80000001:
1391                 cp->cp_edx &=
1392                     ~(CPUID_AMD_EDX_PSE |
1393                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1394                     CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1395                     CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1396                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1397                     CPUID_AMD_EDX_TSCP);
1398                 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1399                 break;
1400         default:
1401                 break;
1402         }
1403 
1404         switch (vendor) {
1405         case X86_VENDOR_Intel:
1406                 switch (eax) {
1407                 case 4:
1408                         /*
1409                          * Zero out the (ncores-per-chip - 1) field
1410                          */
1411                         cp->cp_eax &= 0x03fffffff;
1412                         break;
1413                 default:
1414                         break;
1415                 }
1416                 break;
1417         case X86_VENDOR_AMD:
1418                 switch (eax) {
1419 
1420                 case 0x80000001:
1421                         cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1422                         break;
1423 
1424                 case CPUID_LEAF_EXT_8:
1425                         /*
1426                          * Zero out the (ncores-per-chip - 1) field
1427                          */
1428                         cp->cp_ecx &= 0xffffff00;
1429                         break;
1430                 default:
1431                         break;
1432                 }
1433                 break;
1434         default:
1435                 break;
1436         }
1437 }
1438 #else
1439 #define platform_cpuid_mangle(vendor, eax, cp)  /* nothing */
1440 #endif
1441 
1442 /*
1443  *  Some undocumented ways of patching the results of the cpuid
1444  *  instruction to permit running Solaris 10 on future cpus that
1445  *  we don't currently support.  Could be set to non-zero values
1446  *  via settings in eeprom.
1447  */
1448 
1449 uint32_t cpuid_feature_ecx_include;
1450 uint32_t cpuid_feature_ecx_exclude;
1451 uint32_t cpuid_feature_edx_include;
1452 uint32_t cpuid_feature_edx_exclude;
1453 
1454 /*
1455  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1456  */
1457 void
1458 cpuid_alloc_space(cpu_t *cpu)
1459 {
1460         /*
1461          * By convention, cpu0 is the boot cpu, which is set up
1462          * before memory allocation is available.  All other cpus get
1463          * their cpuid_info struct allocated here.
1464          */
1465         ASSERT(cpu->cpu_id != 0);
1466         ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1467         cpu->cpu_m.mcpu_cpi =
1468             kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1469 }
1470 
1471 void
1472 cpuid_free_space(cpu_t *cpu)
1473 {
1474         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1475         int i;
1476 
1477         ASSERT(cpi != NULL);
1478         ASSERT(cpi != &cpuid_info0);
1479 
1480         /*
1481          * Free up any cache leaf related dynamic storage. The first entry was
1482          * cached from the standard cpuid storage, so we should not free it.
1483          */
1484         for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1485                 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1486         if (cpi->cpi_cache_leaf_size > 0)
1487                 kmem_free(cpi->cpi_cache_leaves,
1488                     cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1489 
1490         kmem_free(cpi, sizeof (*cpi));
1491         cpu->cpu_m.mcpu_cpi = NULL;
1492 }
1493 
1494 #if !defined(__xpv)
1495 /*
1496  * Determine the type of the underlying platform. This is used to customize
1497  * initialization of various subsystems (e.g. TSC). determine_platform() must
1498  * only ever be called once to prevent two processors from seeing different
1499  * values of platform_type. Must be called before cpuid_pass1(), the earliest
1500  * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1501  */
1502 void
1503 determine_platform(void)
1504 {
1505         struct cpuid_regs cp;
1506         uint32_t base;
1507         uint32_t regs[4];
1508         char *hvstr = (char *)regs;
1509 
1510         ASSERT(platform_type == -1);
1511 
1512         platform_type = HW_NATIVE;
1513 
1514         if (!enable_platform_detection)
1515                 return;
1516 
1517         /*
1518          * If Hypervisor CPUID bit is set, try to determine hypervisor
1519          * vendor signature, and set platform type accordingly.
1520          *
1521          * References:
1522          * http://lkml.org/lkml/2008/10/1/246
1523          * http://kb.vmware.com/kb/1009458
1524          */
1525         cp.cp_eax = 0x1;
1526         (void) __cpuid_insn(&cp);
1527         if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1528                 cp.cp_eax = 0x40000000;
1529                 (void) __cpuid_insn(&cp);
1530                 regs[0] = cp.cp_ebx;
1531                 regs[1] = cp.cp_ecx;
1532                 regs[2] = cp.cp_edx;
1533                 regs[3] = 0;
1534                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1535                         platform_type = HW_XEN_HVM;
1536                         return;
1537                 }
1538                 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1539                         platform_type = HW_VMWARE;
1540                         return;
1541                 }
1542                 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1543                         platform_type = HW_KVM;
1544                         return;
1545                 }
1546                 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1547                         platform_type = HW_BHYVE;
1548                         return;
1549                 }
1550                 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1551                         platform_type = HW_MICROSOFT;
1552         } else {
1553                 /*
1554                  * Check older VMware hardware versions. VMware hypervisor is
1555                  * detected by performing an IN operation to VMware hypervisor
1556                  * port and checking that value returned in %ebx is VMware
1557                  * hypervisor magic value.
1558                  *
1559                  * References: http://kb.vmware.com/kb/1009458
1560                  */
1561                 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1562                 if (regs[1] == VMWARE_HVMAGIC) {
1563                         platform_type = HW_VMWARE;
1564                         return;
1565                 }
1566         }
1567 
1568         /*
1569          * Check Xen hypervisor. In a fully virtualized domain,
1570          * Xen's pseudo-cpuid function returns a string representing the
1571          * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1572          * supported cpuid function. We need at least a (base + 2) leaf value
1573          * to do what we want to do. Try different base values, since the
1574          * hypervisor might use a different one depending on whether Hyper-V
1575          * emulation is switched on by default or not.
1576          */
1577         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1578                 cp.cp_eax = base;
1579                 (void) __cpuid_insn(&cp);
1580                 regs[0] = cp.cp_ebx;
1581                 regs[1] = cp.cp_ecx;
1582                 regs[2] = cp.cp_edx;
1583                 regs[3] = 0;
1584                 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1585                     cp.cp_eax >= (base + 2)) {
1586                         platform_type &= ~HW_NATIVE;
1587                         platform_type |= HW_XEN_HVM;
1588                         return;
1589                 }
1590         }
1591 }
1592 
1593 int
1594 get_hwenv(void)
1595 {
1596         ASSERT(platform_type != -1);
1597         return (platform_type);
1598 }
1599 
1600 int
1601 is_controldom(void)
1602 {
1603         return (0);
1604 }
1605 
1606 #else
1607 
1608 int
1609 get_hwenv(void)
1610 {
1611         return (HW_XEN_PV);
1612 }
1613 
1614 int
1615 is_controldom(void)
1616 {
1617         return (DOMAIN_IS_INITDOMAIN(xen_info));
1618 }
1619 
1620 #endif  /* __xpv */
1621 
1622 /*
1623  * Make sure that we have gathered all of the CPUID leaves that we might need to
1624  * determine topology. We assume that the standard leaf 1 has already been done
1625  * and that xmaxeax has already been calculated.
1626  */
1627 static void
1628 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1629 {
1630         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1631 
1632         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1633                 struct cpuid_regs *cp;
1634 
1635                 cp = &cpi->cpi_extd[8];
1636                 cp->cp_eax = CPUID_LEAF_EXT_8;
1637                 (void) __cpuid_insn(cp);
1638                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1639         }
1640 
1641         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1642             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1643                 struct cpuid_regs *cp;
1644 
1645                 cp = &cpi->cpi_extd[0x1e];
1646                 cp->cp_eax = CPUID_LEAF_EXT_1e;
1647                 (void) __cpuid_insn(cp);
1648         }
1649 }
1650 
1651 /*
1652  * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1653  * it to everything else. If not, and we're on an AMD system where 8000001e is
1654  * valid, then we use that. Othewrise, we fall back to the default value for the
1655  * APIC ID in leaf 1.
1656  */
1657 static uint32_t
1658 cpuid_gather_apicid(struct cpuid_info *cpi)
1659 {
1660         /*
1661          * Leaf B changes based on the arguments to it. Beacuse we don't cache
1662          * it, we need to gather it again.
1663          */
1664         if (cpi->cpi_maxeax >= 0xB) {
1665                 struct cpuid_regs regs;
1666                 struct cpuid_regs *cp;
1667 
1668                 cp = &regs;
1669                 cp->cp_eax = 0xB;
1670                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1671                 (void) __cpuid_insn(cp);
1672 
1673                 if (cp->cp_ebx != 0) {
1674                         return (cp->cp_edx);
1675                 }
1676         }
1677 
1678         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1679             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1680             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1681                 return (cpi->cpi_extd[0x1e].cp_eax);
1682         }
1683 
1684         return (CPI_APIC_ID(cpi));
1685 }
1686 
1687 /*
1688  * For AMD processors, attempt to calculate the number of chips and cores that
1689  * exist. The way that we do this varies based on the generation, because the
1690  * generations themselves have changed dramatically.
1691  *
1692  * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1693  * However, with the advent of family 17h (Zen) it actually tells us the number
1694  * of threads, so we need to look at leaf 0x8000001e if available to determine
1695  * its value. Otherwise, for all prior families, the number of enabled cores is
1696  * the same as threads.
1697  *
1698  * If we do not have leaf 0x80000008, then we assume that this processor does
1699  * not have anything. AMD's older CPUID specification says there's no reason to
1700  * fall back to leaf 1.
1701  *
1702  * In some virtualization cases we will not have leaf 8000001e or it will be
1703  * zero. When that happens we assume the number of threads is one.
1704  */
1705 static void
1706 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1707 {
1708         uint_t nthreads, nthread_per_core;
1709 
1710         nthreads = nthread_per_core = 1;
1711 
1712         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1713                 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1714         } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1715                 nthreads = CPI_CPU_COUNT(cpi);
1716         }
1717 
1718         /*
1719          * For us to have threads, and know about it, we have to be at least at
1720          * family 17h and have the cpuid bit that says we have extended
1721          * topology.
1722          */
1723         if (cpi->cpi_family >= 0x17 &&
1724             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1725             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1726                 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1727         }
1728 
1729         *ncpus = nthreads;
1730         *ncores = nthreads / nthread_per_core;
1731 }
1732 
1733 /*
1734  * Seed the initial values for the cores and threads for an Intel based
1735  * processor. These values will be overwritten if we detect that the processor
1736  * supports CPUID leaf 0xb.
1737  */
1738 static void
1739 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1740 {
1741         /*
1742          * Only seed the number of physical cores from the first level leaf 4
1743          * information. The number of threads there indicate how many share the
1744          * L1 cache, which may or may not have anything to do with the number of
1745          * logical CPUs per core.
1746          */
1747         if (cpi->cpi_maxeax >= 4) {
1748                 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1749         } else {
1750                 *ncores = 1;
1751         }
1752 
1753         if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1754                 *ncpus = CPI_CPU_COUNT(cpi);
1755         } else {
1756                 *ncpus = *ncores;
1757         }
1758 }
1759 
1760 static boolean_t
1761 cpuid_leafB_getids(cpu_t *cpu)
1762 {
1763         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1764         struct cpuid_regs regs;
1765         struct cpuid_regs *cp;
1766 
1767         if (cpi->cpi_maxeax < 0xB)
1768                 return (B_FALSE);
1769 
1770         cp = &regs;
1771         cp->cp_eax = 0xB;
1772         cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1773 
1774         (void) __cpuid_insn(cp);
1775 
1776         /*
1777          * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1778          * indicates that the extended topology enumeration leaf is
1779          * available.
1780          */
1781         if (cp->cp_ebx != 0) {
1782                 uint32_t x2apic_id = 0;
1783                 uint_t coreid_shift = 0;
1784                 uint_t ncpu_per_core = 1;
1785                 uint_t chipid_shift = 0;
1786                 uint_t ncpu_per_chip = 1;
1787                 uint_t i;
1788                 uint_t level;
1789 
1790                 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1791                         cp->cp_eax = 0xB;
1792                         cp->cp_ecx = i;
1793 
1794                         (void) __cpuid_insn(cp);
1795                         level = CPI_CPU_LEVEL_TYPE(cp);
1796 
1797                         if (level == 1) {
1798                                 x2apic_id = cp->cp_edx;
1799                                 coreid_shift = BITX(cp->cp_eax, 4, 0);
1800                                 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1801                         } else if (level == 2) {
1802                                 x2apic_id = cp->cp_edx;
1803                                 chipid_shift = BITX(cp->cp_eax, 4, 0);
1804                                 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1805                         }
1806                 }
1807 
1808                 /*
1809                  * cpi_apicid is taken care of in cpuid_gather_apicid.
1810                  */
1811                 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1812                 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1813                     ncpu_per_core;
1814                 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1815                 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1816                 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1817                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1818                 cpi->cpi_procnodeid = cpi->cpi_chipid;
1819                 cpi->cpi_compunitid = cpi->cpi_coreid;
1820 
1821                 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1822                         cpi->cpi_nthread_bits = coreid_shift;
1823                         cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1824                 }
1825 
1826                 return (B_TRUE);
1827         } else {
1828                 return (B_FALSE);
1829         }
1830 }
1831 
1832 static void
1833 cpuid_intel_getids(cpu_t *cpu, void *feature)
1834 {
1835         uint_t i;
1836         uint_t chipid_shift = 0;
1837         uint_t coreid_shift = 0;
1838         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1839 
1840         /*
1841          * There are no compute units or processor nodes currently on Intel.
1842          * Always set these to one.
1843          */
1844         cpi->cpi_procnodes_per_pkg = 1;
1845         cpi->cpi_cores_per_compunit = 1;
1846 
1847         /*
1848          * If cpuid Leaf B is present, use that to try and get this information.
1849          * It will be the most accurate for Intel CPUs.
1850          */
1851         if (cpuid_leafB_getids(cpu))
1852                 return;
1853 
1854         /*
1855          * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1856          * and ncore_per_chip. These represent the largest power of two values
1857          * that we need to cover all of the IDs in the system. Therefore, we use
1858          * those values to seed the number of bits needed to cover information
1859          * in the case when leaf B is not available. These values will probably
1860          * be larger than required, but that's OK.
1861          */
1862         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1863         cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1864 
1865         for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1866                 chipid_shift++;
1867 
1868         cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1869         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1870 
1871         if (is_x86_feature(feature, X86FSET_CMP)) {
1872                 /*
1873                  * Multi-core (and possibly multi-threaded)
1874                  * processors.
1875                  */
1876                 uint_t ncpu_per_core;
1877                 if (cpi->cpi_ncore_per_chip == 1)
1878                         ncpu_per_core = cpi->cpi_ncpu_per_chip;
1879                 else if (cpi->cpi_ncore_per_chip > 1)
1880                         ncpu_per_core = cpi->cpi_ncpu_per_chip /
1881                             cpi->cpi_ncore_per_chip;
1882                 /*
1883                  * 8bit APIC IDs on dual core Pentiums
1884                  * look like this:
1885                  *
1886                  * +-----------------------+------+------+
1887                  * | Physical Package ID   |  MC  |  HT  |
1888                  * +-----------------------+------+------+
1889                  * <------- chipid -------->
1890                  * <------- coreid --------------->
1891                  *                         <--- clogid -->
1892                  *                         <------>
1893                  *                         pkgcoreid
1894                  *
1895                  * Where the number of bits necessary to
1896                  * represent MC and HT fields together equals
1897                  * to the minimum number of bits necessary to
1898                  * store the value of cpi->cpi_ncpu_per_chip.
1899                  * Of those bits, the MC part uses the number
1900                  * of bits necessary to store the value of
1901                  * cpi->cpi_ncore_per_chip.
1902                  */
1903                 for (i = 1; i < ncpu_per_core; i <<= 1)
1904                         coreid_shift++;
1905                 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1906                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1907         } else if (is_x86_feature(feature, X86FSET_HTT)) {
1908                 /*
1909                  * Single-core multi-threaded processors.
1910                  */
1911                 cpi->cpi_coreid = cpi->cpi_chipid;
1912                 cpi->cpi_pkgcoreid = 0;
1913         } else {
1914                 /*
1915                  * Single-core single-thread processors.
1916                  */
1917                 cpi->cpi_coreid = cpu->cpu_id;
1918                 cpi->cpi_pkgcoreid = 0;
1919         }
1920         cpi->cpi_procnodeid = cpi->cpi_chipid;
1921         cpi->cpi_compunitid = cpi->cpi_coreid;
1922 }
1923 
1924 /*
1925  * Historically, AMD has had CMP chips with only a single thread per core.
1926  * However, starting in family 17h (Zen), this has changed and they now have
1927  * multiple threads. Our internal core id needs to be a unique value.
1928  *
1929  * To determine the core id of an AMD system, if we're from a family before 17h,
1930  * then we just use the cpu id, as that gives us a good value that will be
1931  * unique for each core. If instead, we're on family 17h or later, then we need
1932  * to do something more complicated. CPUID leaf 0x8000001e can tell us
1933  * how many threads are in the system. Based on that, we'll shift the APIC ID.
1934  * We can't use the normal core id in that leaf as it's only unique within the
1935  * socket, which is perfect for cpi_pkgcoreid, but not us.
1936  */
1937 static id_t
1938 cpuid_amd_get_coreid(cpu_t *cpu)
1939 {
1940         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1941 
1942         if (cpi->cpi_family >= 0x17 &&
1943             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1944             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1945                 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1946                 if (nthreads > 1) {
1947                         VERIFY3U(nthreads, ==, 2);
1948                         return (cpi->cpi_apicid >> 1);
1949                 }
1950         }
1951 
1952         return (cpu->cpu_id);
1953 }
1954 
1955 /*
1956  * IDs on AMD is a more challenging task. This is notable because of the
1957  * following two facts:
1958  *
1959  *  1. Before family 0x17 (Zen), there was no support for SMT and there was
1960  *     also no way to get an actual unique core id from the system. As such, we
1961  *     synthesize this case by using cpu->cpu_id.  This scheme does not,
1962  *     however, guarantee that sibling cores of a chip will have sequential
1963  *     coreids starting at a multiple of the number of cores per chip - that is
1964  *     usually the case, but if the ACPI MADT table is presented in a different
1965  *     order then we need to perform a few more gymnastics for the pkgcoreid.
1966  *
1967  *  2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1968  *     called compute units. These compute units share the L1I cache, L2 cache,
1969  *     and the FPU. To deal with this, a new topology leaf was added in
1970  *     0x8000001e. However, parts of this leaf have different meanings
1971  *     once we get to family 0x17.
1972  */
1973 
1974 static void
1975 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1976 {
1977         int i, first_half, coreidsz;
1978         uint32_t nb_caps_reg;
1979         uint_t node2_1;
1980         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1981         struct cpuid_regs *cp;
1982 
1983         /*
1984          * Calculate the core id (this comes from hardware in family 0x17 if it
1985          * hasn't been stripped by virtualization). We always set the compute
1986          * unit id to the same value. Also, initialize the default number of
1987          * cores per compute unit and nodes per package. This will be
1988          * overwritten when we know information about a particular family.
1989          */
1990         cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1991         cpi->cpi_compunitid = cpi->cpi_coreid;
1992         cpi->cpi_cores_per_compunit = 1;
1993         cpi->cpi_procnodes_per_pkg = 1;
1994 
1995         /*
1996          * To construct the logical ID, we need to determine how many APIC IDs
1997          * are dedicated to the cores and threads. This is provided for us in
1998          * 0x80000008. However, if it's not present (say due to virtualization),
1999          * then we assume it's one. This should be present on all 64-bit AMD
2000          * processors.  It was added in family 0xf (Hammer).
2001          */
2002         if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2003                 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2004 
2005                 /*
2006                  * In AMD parlance chip is really a node while illumos
2007                  * uses chip as equivalent to socket/package.
2008                  */
2009                 if (coreidsz == 0) {
2010                         /* Use legacy method */
2011                         for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2012                                 coreidsz++;
2013                         if (coreidsz == 0)
2014                                 coreidsz = 1;
2015                 }
2016         } else {
2017                 /* Assume single-core part */
2018                 coreidsz = 1;
2019         }
2020         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2021 
2022         /*
2023          * The package core ID varies depending on the family. For family 17h,
2024          * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2025          * can use the clogid as is. When family 17h is virtualized, the clogid
2026          * should be sufficient as if we don't have valid data in the leaf, then
2027          * we won't think we have SMT, in which case the cpi_clogid should be
2028          * sufficient.
2029          */
2030         if (cpi->cpi_family >= 0x17 &&
2031             is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2032             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2033             cpi->cpi_extd[0x1e].cp_ebx != 0) {
2034                 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2035         } else {
2036                 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2037         }
2038 
2039         /*
2040          * Obtain the node ID and compute unit IDs. If we're on family 0x15
2041          * (bulldozer) or newer, then we can derive all of this from leaf
2042          * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2043          */
2044         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2045             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2046                 cp = &cpi->cpi_extd[0x1e];
2047 
2048                 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2049                 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2050 
2051                 /*
2052                  * For Bulldozer-era CPUs, recalculate the compute unit
2053                  * information.
2054                  */
2055                 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2056                         cpi->cpi_cores_per_compunit =
2057                             BITX(cp->cp_ebx, 15, 8) + 1;
2058                         cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2059                             (cpi->cpi_ncore_per_chip /
2060                             cpi->cpi_cores_per_compunit) *
2061                             (cpi->cpi_procnodeid /
2062                             cpi->cpi_procnodes_per_pkg);
2063                 }
2064         } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2065                 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2066         } else if (cpi->cpi_family == 0x10) {
2067                 /*
2068                  * See if we are a multi-node processor.
2069                  * All processors in the system have the same number of nodes
2070                  */
2071                 nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
2072                 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2073                         /* Single-node */
2074                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2075                             coreidsz);
2076                 } else {
2077 
2078                         /*
2079                          * Multi-node revision D (2 nodes per package
2080                          * are supported)
2081                          */
2082                         cpi->cpi_procnodes_per_pkg = 2;
2083 
2084                         first_half = (cpi->cpi_pkgcoreid <=
2085                             (cpi->cpi_ncore_per_chip/2 - 1));
2086 
2087                         if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2088                                 /* We are BSP */
2089                                 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2090                         } else {
2091 
2092                                 /* We are AP */
2093                                 /* NodeId[2:1] bits to use for reading F3xe8 */
2094                                 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2095 
2096                                 nb_caps_reg =
2097                                     pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2098 
2099                                 /*
2100                                  * Check IntNodeNum bit (31:30, but bit 31 is
2101                                  * always 0 on dual-node processors)
2102                                  */
2103                                 if (BITX(nb_caps_reg, 30, 30) == 0)
2104                                         cpi->cpi_procnodeid = node2_1 +
2105                                             !first_half;
2106                                 else
2107                                         cpi->cpi_procnodeid = node2_1 +
2108                                             first_half;
2109                         }
2110                 }
2111         } else {
2112                 cpi->cpi_procnodeid = 0;
2113         }
2114 
2115         cpi->cpi_chipid =
2116             cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2117 
2118         cpi->cpi_ncore_bits = coreidsz;
2119         cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2120             cpi->cpi_ncore_per_chip);
2121 }
2122 
2123 static void
2124 spec_l1d_flush_noop(void)
2125 {
2126 }
2127 
2128 static void
2129 spec_l1d_flush_msr(void)
2130 {
2131         wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2132 }
2133 
2134 void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
2135 
2136 static void
2137 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2138 {
2139         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2140 
2141         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2142             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2143                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2144                         add_x86_feature(featureset, X86FSET_IBPB);
2145                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2146                         add_x86_feature(featureset, X86FSET_IBRS);
2147                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2148                         add_x86_feature(featureset, X86FSET_STIBP);
2149                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2150                         add_x86_feature(featureset, X86FSET_IBRS_ALL);
2151                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2152                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2153                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2154                         add_x86_feature(featureset, X86FSET_RSBA);
2155                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2156                         add_x86_feature(featureset, X86FSET_SSBD);
2157                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2158                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2159                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2160                         add_x86_feature(featureset, X86FSET_SSB_NO);
2161         } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2162             cpi->cpi_maxeax >= 7) {
2163                 struct cpuid_regs *ecp;
2164                 ecp = &cpi->cpi_std[7];
2165 
2166                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2167                         add_x86_feature(featureset, X86FSET_IBRS);
2168                         add_x86_feature(featureset, X86FSET_IBPB);
2169                 }
2170 
2171                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2172                         add_x86_feature(featureset, X86FSET_STIBP);
2173                 }
2174 
2175                 /*
2176                  * Don't read the arch caps MSR on xpv where we lack the
2177                  * on_trap().
2178                  */
2179 #ifndef __xpv
2180                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2181                         on_trap_data_t otd;
2182 
2183                         /*
2184                          * Be paranoid and assume we'll get a #GP.
2185                          */
2186                         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2187                                 uint64_t reg;
2188 
2189                                 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2190                                 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2191                                         add_x86_feature(featureset,
2192                                             X86FSET_RDCL_NO);
2193                                 }
2194                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2195                                         add_x86_feature(featureset,
2196                                             X86FSET_IBRS_ALL);
2197                                 }
2198                                 if (reg & IA32_ARCH_CAP_RSBA) {
2199                                         add_x86_feature(featureset,
2200                                             X86FSET_RSBA);
2201                                 }
2202                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2203                                         add_x86_feature(featureset,
2204                                             X86FSET_L1D_VM_NO);
2205                                 }
2206                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2207                                         add_x86_feature(featureset,
2208                                             X86FSET_SSB_NO);
2209                                 }
2210                         }
2211                         no_trap();
2212                 }
2213 #endif  /* !__xpv */
2214 
2215                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2216                         add_x86_feature(featureset, X86FSET_SSBD);
2217 
2218                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2219                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2220         }
2221 
2222         if (cpu->cpu_id != 0)
2223                 return;
2224 
2225         /*
2226          * We're the boot CPU, so let's figure out our L1TF status.
2227          *
2228          * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
2229          * need to exclude with ht_acquire(), and we don't need to flush.
2230          */
2231         if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
2232                 extern int ht_exclusion;
2233                 ht_exclusion = 0;
2234                 spec_l1d_flush = spec_l1d_flush_noop;
2235                 membar_producer();
2236                 return;
2237         }
2238 
2239         /*
2240          * If HT is enabled, we will need HT exclusion, as well as the flush on
2241          * VM entry.  If HT isn't enabled, we still need at least the flush for
2242          * the L1TF sequential case.
2243          *
2244          * However, if X86FSET_L1D_VM_NO is set, we're most likely running
2245          * inside a VM ourselves, and we don't need the flush.
2246          *
2247          * If we don't have the FLUSH_CMD available at all, we'd better just
2248          * hope HT is disabled.
2249          */
2250         if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2251             !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2252                 spec_l1d_flush = spec_l1d_flush_msr;
2253         } else {
2254                 spec_l1d_flush = spec_l1d_flush_noop;
2255         }
2256 
2257         membar_producer();
2258 }
2259 
2260 /*
2261  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2262  */
2263 void
2264 setup_xfem(void)
2265 {
2266         uint64_t flags = XFEATURE_LEGACY_FP;
2267 
2268         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2269 
2270         if (is_x86_feature(x86_featureset, X86FSET_SSE))
2271                 flags |= XFEATURE_SSE;
2272 
2273         if (is_x86_feature(x86_featureset, X86FSET_AVX))
2274                 flags |= XFEATURE_AVX;
2275 
2276         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2277                 flags |= XFEATURE_AVX512;
2278 
2279         set_xcr(XFEATURE_ENABLED_MASK, flags);
2280 
2281         xsave_bv_all = flags;
2282 }
2283 
2284 static void
2285 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2286 {
2287         struct cpuid_info *cpi;
2288 
2289         cpi = cpu->cpu_m.mcpu_cpi;
2290 
2291         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2292                 cpuid_gather_amd_topology_leaves(cpu);
2293         }
2294 
2295         cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2296 
2297         /*
2298          * Before we can calculate the IDs that we should assign to this
2299          * processor, we need to understand how many cores and threads it has.
2300          */
2301         switch (cpi->cpi_vendor) {
2302         case X86_VENDOR_Intel:
2303                 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2304                     &cpi->cpi_ncore_per_chip);
2305                 break;
2306         case X86_VENDOR_AMD:
2307                 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2308                     &cpi->cpi_ncore_per_chip);
2309                 break;
2310         default:
2311                 /*
2312                  * If we have some other x86 compatible chip, it's not clear how
2313                  * they would behave. The most common case is virtualization
2314                  * today, though there are also 64-bit VIA chips. Assume that
2315                  * all we can get is the basic Leaf 1 HTT information.
2316                  */
2317                 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2318                         cpi->cpi_ncore_per_chip = 1;
2319                         cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2320                 }
2321                 break;
2322         }
2323 
2324         /*
2325          * Based on the calculated number of threads and cores, potentially
2326          * assign the HTT and CMT features.
2327          */
2328         if (cpi->cpi_ncore_per_chip > 1) {
2329                 add_x86_feature(featureset, X86FSET_CMP);
2330         }
2331 
2332         if (cpi->cpi_ncpu_per_chip > 1 &&
2333             cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2334                 add_x86_feature(featureset, X86FSET_HTT);
2335         }
2336 
2337         /*
2338          * Now that has been set up, we need to go through and calculate all of
2339          * the rest of the parameters that exist. If we think the CPU doesn't
2340          * have either SMT (HTT) or CMP, then we basically go through and fake
2341          * up information in some way. The most likely case for this is
2342          * virtualization where we have a lot of partial topology information.
2343          */
2344         if (!is_x86_feature(featureset, X86FSET_HTT) &&
2345             !is_x86_feature(featureset, X86FSET_CMP)) {
2346                 /*
2347                  * This is a single core, single-threaded processor.
2348                  */
2349                 cpi->cpi_procnodes_per_pkg = 1;
2350                 cpi->cpi_cores_per_compunit = 1;
2351                 cpi->cpi_compunitid = 0;
2352                 cpi->cpi_chipid = -1;
2353                 cpi->cpi_clogid = 0;
2354                 cpi->cpi_coreid = cpu->cpu_id;
2355                 cpi->cpi_pkgcoreid = 0;
2356                 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2357                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2358                 } else {
2359                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2360                 }
2361         } else {
2362                 switch (cpi->cpi_vendor) {
2363                 case X86_VENDOR_Intel:
2364                         cpuid_intel_getids(cpu, featureset);
2365                         break;
2366                 case X86_VENDOR_AMD:
2367                         cpuid_amd_getids(cpu, featureset);
2368                         break;
2369                 default:
2370                         /*
2371                          * In this case, it's hard to say what we should do.
2372                          * We're going to model them to the OS as single core
2373                          * threads. We don't have a good identifier for them, so
2374                          * we're just going to use the cpu id all on a single
2375                          * chip.
2376                          *
2377                          * This case has historically been different from the
2378                          * case above where we don't have HTT or CMP. While they
2379                          * could be combined, we've opted to keep it separate to
2380                          * minimize the risk of topology changes in weird cases.
2381                          */
2382                         cpi->cpi_procnodes_per_pkg = 1;
2383                         cpi->cpi_cores_per_compunit = 1;
2384                         cpi->cpi_chipid = 0;
2385                         cpi->cpi_coreid = cpu->cpu_id;
2386                         cpi->cpi_clogid = cpu->cpu_id;
2387                         cpi->cpi_pkgcoreid = cpu->cpu_id;
2388                         cpi->cpi_procnodeid = cpi->cpi_chipid;
2389                         cpi->cpi_compunitid = cpi->cpi_coreid;
2390                         break;
2391                 }
2392         }
2393 }
2394 
2395 void
2396 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2397 {
2398         uint32_t mask_ecx, mask_edx;
2399         struct cpuid_info *cpi;
2400         struct cpuid_regs *cp;
2401         int xcpuid;
2402 #if !defined(__xpv)
2403         extern int idle_cpu_prefer_mwait;
2404 #endif
2405 
2406         /*
2407          * Space statically allocated for BSP, ensure pointer is set
2408          */
2409         if (cpu->cpu_id == 0) {
2410                 if (cpu->cpu_m.mcpu_cpi == NULL)
2411                         cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2412         }
2413 
2414         add_x86_feature(featureset, X86FSET_CPUID);
2415 
2416         cpi = cpu->cpu_m.mcpu_cpi;
2417         ASSERT(cpi != NULL);
2418         cp = &cpi->cpi_std[0];
2419         cp->cp_eax = 0;
2420         cpi->cpi_maxeax = __cpuid_insn(cp);
2421         {
2422                 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2423                 *iptr++ = cp->cp_ebx;
2424                 *iptr++ = cp->cp_edx;
2425                 *iptr++ = cp->cp_ecx;
2426                 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2427         }
2428 
2429         cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2430         x86_vendor = cpi->cpi_vendor; /* for compatibility */
2431 
2432         /*
2433          * Limit the range in case of weird hardware
2434          */
2435         if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2436                 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2437         if (cpi->cpi_maxeax < 1)
2438                 goto pass1_done;
2439 
2440         cp = &cpi->cpi_std[1];
2441         cp->cp_eax = 1;
2442         (void) __cpuid_insn(cp);
2443 
2444         /*
2445          * Extract identifying constants for easy access.
2446          */
2447         cpi->cpi_model = CPI_MODEL(cpi);
2448         cpi->cpi_family = CPI_FAMILY(cpi);
2449 
2450         if (cpi->cpi_family == 0xf)
2451                 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2452 
2453         /*
2454          * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2455          * Intel, and presumably everyone else, uses model == 0xf, as
2456          * one would expect (max value means possible overflow).  Sigh.
2457          */
2458 
2459         switch (cpi->cpi_vendor) {
2460         case X86_VENDOR_Intel:
2461                 if (IS_EXTENDED_MODEL_INTEL(cpi))
2462                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2463                 break;
2464         case X86_VENDOR_AMD:
2465                 if (CPI_FAMILY(cpi) == 0xf)
2466                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2467                 break;
2468         default:
2469                 if (cpi->cpi_model == 0xf)
2470                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2471                 break;
2472         }
2473 
2474         cpi->cpi_step = CPI_STEP(cpi);
2475         cpi->cpi_brandid = CPI_BRANDID(cpi);
2476 
2477         /*
2478          * *default* assumptions:
2479          * - believe %edx feature word
2480          * - ignore %ecx feature word
2481          * - 32-bit virtual and physical addressing
2482          */
2483         mask_edx = 0xffffffff;
2484         mask_ecx = 0;
2485 
2486         cpi->cpi_pabits = cpi->cpi_vabits = 32;
2487 
2488         switch (cpi->cpi_vendor) {
2489         case X86_VENDOR_Intel:
2490                 if (cpi->cpi_family == 5)
2491                         x86_type = X86_TYPE_P5;
2492                 else if (IS_LEGACY_P6(cpi)) {
2493                         x86_type = X86_TYPE_P6;
2494                         pentiumpro_bug4046376 = 1;
2495                         /*
2496                          * Clear the SEP bit when it was set erroneously
2497                          */
2498                         if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2499                                 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2500                 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2501                         x86_type = X86_TYPE_P4;
2502                         /*
2503                          * We don't currently depend on any of the %ecx
2504                          * features until Prescott, so we'll only check
2505                          * this from P4 onwards.  We might want to revisit
2506                          * that idea later.
2507                          */
2508                         mask_ecx = 0xffffffff;
2509                 } else if (cpi->cpi_family > 0xf)
2510                         mask_ecx = 0xffffffff;
2511                 /*
2512                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2513                  * to obtain the monitor linesize.
2514                  */
2515                 if (cpi->cpi_maxeax < 5)
2516                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2517                 break;
2518         case X86_VENDOR_IntelClone:
2519         default:
2520                 break;
2521         case X86_VENDOR_AMD:
2522 #if defined(OPTERON_ERRATUM_108)
2523                 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2524                         cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2525                         cpi->cpi_model = 0xc;
2526                 } else
2527 #endif
2528                 if (cpi->cpi_family == 5) {
2529                         /*
2530                          * AMD K5 and K6
2531                          *
2532                          * These CPUs have an incomplete implementation
2533                          * of MCA/MCE which we mask away.
2534                          */
2535                         mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2536 
2537                         /*
2538                          * Model 0 uses the wrong (APIC) bit
2539                          * to indicate PGE.  Fix it here.
2540                          */
2541                         if (cpi->cpi_model == 0) {
2542                                 if (cp->cp_edx & 0x200) {
2543                                         cp->cp_edx &= ~0x200;
2544                                         cp->cp_edx |= CPUID_INTC_EDX_PGE;
2545                                 }
2546                         }
2547 
2548                         /*
2549                          * Early models had problems w/ MMX; disable.
2550                          */
2551                         if (cpi->cpi_model < 6)
2552                                 mask_edx &= ~CPUID_INTC_EDX_MMX;
2553                 }
2554 
2555                 /*
2556                  * For newer families, SSE3 and CX16, at least, are valid;
2557                  * enable all
2558                  */
2559                 if (cpi->cpi_family >= 0xf)
2560                         mask_ecx = 0xffffffff;
2561                 /*
2562                  * We don't support MONITOR/MWAIT if leaf 5 is not available
2563                  * to obtain the monitor linesize.
2564                  */
2565                 if (cpi->cpi_maxeax < 5)
2566                         mask_ecx &= ~CPUID_INTC_ECX_MON;
2567 
2568 #if !defined(__xpv)
2569                 /*
2570                  * AMD has not historically used MWAIT in the CPU's idle loop.
2571                  * Pre-family-10h Opterons do not have the MWAIT instruction. We
2572                  * know for certain that in at least family 17h, per AMD, mwait
2573                  * is preferred. Families in-between are less certain.
2574                  */
2575                 if (cpi->cpi_family < 0x17) {
2576                         idle_cpu_prefer_mwait = 0;
2577                 }
2578 #endif
2579 
2580                 break;
2581         case X86_VENDOR_TM:
2582                 /*
2583                  * workaround the NT workaround in CMS 4.1
2584                  */
2585                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2586                     (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2587                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2588                 break;
2589         case X86_VENDOR_Centaur:
2590                 /*
2591                  * workaround the NT workarounds again
2592                  */
2593                 if (cpi->cpi_family == 6)
2594                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
2595                 break;
2596         case X86_VENDOR_Cyrix:
2597                 /*
2598                  * We rely heavily on the probing in locore
2599                  * to actually figure out what parts, if any,
2600                  * of the Cyrix cpuid instruction to believe.
2601                  */
2602                 switch (x86_type) {
2603                 case X86_TYPE_CYRIX_486:
2604                         mask_edx = 0;
2605                         break;
2606                 case X86_TYPE_CYRIX_6x86:
2607                         mask_edx = 0;
2608                         break;
2609                 case X86_TYPE_CYRIX_6x86L:
2610                         mask_edx =
2611                             CPUID_INTC_EDX_DE |
2612                             CPUID_INTC_EDX_CX8;
2613                         break;
2614                 case X86_TYPE_CYRIX_6x86MX:
2615                         mask_edx =
2616                             CPUID_INTC_EDX_DE |
2617                             CPUID_INTC_EDX_MSR |
2618                             CPUID_INTC_EDX_CX8 |
2619                             CPUID_INTC_EDX_PGE |
2620                             CPUID_INTC_EDX_CMOV |
2621                             CPUID_INTC_EDX_MMX;
2622                         break;
2623                 case X86_TYPE_CYRIX_GXm:
2624                         mask_edx =
2625                             CPUID_INTC_EDX_MSR |
2626                             CPUID_INTC_EDX_CX8 |
2627                             CPUID_INTC_EDX_CMOV |
2628                             CPUID_INTC_EDX_MMX;
2629                         break;
2630                 case X86_TYPE_CYRIX_MediaGX:
2631                         break;
2632                 case X86_TYPE_CYRIX_MII:
2633                 case X86_TYPE_VIA_CYRIX_III:
2634                         mask_edx =
2635                             CPUID_INTC_EDX_DE |
2636                             CPUID_INTC_EDX_TSC |
2637                             CPUID_INTC_EDX_MSR |
2638                             CPUID_INTC_EDX_CX8 |
2639                             CPUID_INTC_EDX_PGE |
2640                             CPUID_INTC_EDX_CMOV |
2641                             CPUID_INTC_EDX_MMX;
2642                         break;
2643                 default:
2644                         break;
2645                 }
2646                 break;
2647         }
2648 
2649 #if defined(__xpv)
2650         /*
2651          * Do not support MONITOR/MWAIT under a hypervisor
2652          */
2653         mask_ecx &= ~CPUID_INTC_ECX_MON;
2654         /*
2655          * Do not support XSAVE under a hypervisor for now
2656          */
2657         xsave_force_disable = B_TRUE;
2658 
2659 #endif  /* __xpv */
2660 
2661         if (xsave_force_disable) {
2662                 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2663                 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2664                 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2665                 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2666         }
2667 
2668         /*
2669          * Now we've figured out the masks that determine
2670          * which bits we choose to believe, apply the masks
2671          * to the feature words, then map the kernel's view
2672          * of these feature words into its feature word.
2673          */
2674         cp->cp_edx &= mask_edx;
2675         cp->cp_ecx &= mask_ecx;
2676 
2677         /*
2678          * apply any platform restrictions (we don't call this
2679          * immediately after __cpuid_insn here, because we need the
2680          * workarounds applied above first)
2681          */
2682         platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2683 
2684         /*
2685          * In addition to ecx and edx, Intel and AMD are storing a bunch of
2686          * instruction set extensions in leaf 7's ebx, ecx, and edx.
2687          */
2688         if (cpi->cpi_maxeax >= 7) {
2689                 struct cpuid_regs *ecp;
2690                 ecp = &cpi->cpi_std[7];
2691                 ecp->cp_eax = 7;
2692                 ecp->cp_ecx = 0;
2693                 (void) __cpuid_insn(ecp);
2694 
2695                 /*
2696                  * If XSAVE has been disabled, just ignore all of the
2697                  * extended-save-area dependent flags here.
2698                  */
2699                 if (xsave_force_disable) {
2700                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2701                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2702                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2703                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2704                         ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2705                         ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2706                         ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2707                 }
2708 
2709                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2710                         add_x86_feature(featureset, X86FSET_SMEP);
2711 
2712                 /*
2713                  * We check disable_smap here in addition to in startup_smap()
2714                  * to ensure CPUs that aren't the boot CPU don't accidentally
2715                  * include it in the feature set and thus generate a mismatched
2716                  * x86 feature set across CPUs.
2717                  */
2718                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2719                     disable_smap == 0)
2720                         add_x86_feature(featureset, X86FSET_SMAP);
2721 
2722                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2723                         add_x86_feature(featureset, X86FSET_RDSEED);
2724 
2725                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2726                         add_x86_feature(featureset, X86FSET_ADX);
2727 
2728                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2729                         add_x86_feature(featureset, X86FSET_FSGSBASE);
2730 
2731                 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2732                         add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2733 
2734                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2735                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2736                                 add_x86_feature(featureset, X86FSET_INVPCID);
2737 
2738                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2739                                 add_x86_feature(featureset, X86FSET_MPX);
2740 
2741                         if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2742                                 add_x86_feature(featureset, X86FSET_CLWB);
2743                 }
2744         }
2745 
2746         /*
2747          * fold in overrides from the "eeprom" mechanism
2748          */
2749         cp->cp_edx |= cpuid_feature_edx_include;
2750         cp->cp_edx &= ~cpuid_feature_edx_exclude;
2751 
2752         cp->cp_ecx |= cpuid_feature_ecx_include;
2753         cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2754 
2755         if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2756                 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2757         }
2758         if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2759                 add_x86_feature(featureset, X86FSET_TSC);
2760         }
2761         if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2762                 add_x86_feature(featureset, X86FSET_MSR);
2763         }
2764         if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2765                 add_x86_feature(featureset, X86FSET_MTRR);
2766         }
2767         if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2768                 add_x86_feature(featureset, X86FSET_PGE);
2769         }
2770         if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2771                 add_x86_feature(featureset, X86FSET_CMOV);
2772         }
2773         if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2774                 add_x86_feature(featureset, X86FSET_MMX);
2775         }
2776         if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2777             (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2778                 add_x86_feature(featureset, X86FSET_MCA);
2779         }
2780         if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2781                 add_x86_feature(featureset, X86FSET_PAE);
2782         }
2783         if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2784                 add_x86_feature(featureset, X86FSET_CX8);
2785         }
2786         if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2787                 add_x86_feature(featureset, X86FSET_CX16);
2788         }
2789         if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2790                 add_x86_feature(featureset, X86FSET_PAT);
2791         }
2792         if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2793                 add_x86_feature(featureset, X86FSET_SEP);
2794         }
2795         if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2796                 /*
2797                  * In our implementation, fxsave/fxrstor
2798                  * are prerequisites before we'll even
2799                  * try and do SSE things.
2800                  */
2801                 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2802                         add_x86_feature(featureset, X86FSET_SSE);
2803                 }
2804                 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2805                         add_x86_feature(featureset, X86FSET_SSE2);
2806                 }
2807                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2808                         add_x86_feature(featureset, X86FSET_SSE3);
2809                 }
2810                 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2811                         add_x86_feature(featureset, X86FSET_SSSE3);
2812                 }
2813                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2814                         add_x86_feature(featureset, X86FSET_SSE4_1);
2815                 }
2816                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2817                         add_x86_feature(featureset, X86FSET_SSE4_2);
2818                 }
2819                 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2820                         add_x86_feature(featureset, X86FSET_AES);
2821                 }
2822                 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2823                         add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2824                 }
2825 
2826                 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2827                         add_x86_feature(featureset, X86FSET_SHA);
2828 
2829                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2830                         add_x86_feature(featureset, X86FSET_UMIP);
2831                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2832                         add_x86_feature(featureset, X86FSET_PKU);
2833                 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2834                         add_x86_feature(featureset, X86FSET_OSPKE);
2835 
2836                 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2837                         add_x86_feature(featureset, X86FSET_XSAVE);
2838 
2839                         /* We only test AVX & AVX512 when there is XSAVE */
2840 
2841                         if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2842                                 add_x86_feature(featureset,
2843                                     X86FSET_AVX);
2844 
2845                                 /*
2846                                  * Intel says we can't check these without also
2847                                  * checking AVX.
2848                                  */
2849                                 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2850                                         add_x86_feature(featureset,
2851                                             X86FSET_F16C);
2852 
2853                                 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2854                                         add_x86_feature(featureset,
2855                                             X86FSET_FMA);
2856 
2857                                 if (cpi->cpi_std[7].cp_ebx &
2858                                     CPUID_INTC_EBX_7_0_BMI1)
2859                                         add_x86_feature(featureset,
2860                                             X86FSET_BMI1);
2861 
2862                                 if (cpi->cpi_std[7].cp_ebx &
2863                                     CPUID_INTC_EBX_7_0_BMI2)
2864                                         add_x86_feature(featureset,
2865                                             X86FSET_BMI2);
2866 
2867                                 if (cpi->cpi_std[7].cp_ebx &
2868                                     CPUID_INTC_EBX_7_0_AVX2)
2869                                         add_x86_feature(featureset,
2870                                             X86FSET_AVX2);
2871                         }
2872 
2873                         if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2874                             (cpi->cpi_std[7].cp_ebx &
2875                             CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2876                                 add_x86_feature(featureset, X86FSET_AVX512F);
2877 
2878                                 if (cpi->cpi_std[7].cp_ebx &
2879                                     CPUID_INTC_EBX_7_0_AVX512DQ)
2880                                         add_x86_feature(featureset,
2881                                             X86FSET_AVX512DQ);
2882                                 if (cpi->cpi_std[7].cp_ebx &
2883                                     CPUID_INTC_EBX_7_0_AVX512IFMA)
2884                                         add_x86_feature(featureset,
2885                                             X86FSET_AVX512FMA);
2886                                 if (cpi->cpi_std[7].cp_ebx &
2887                                     CPUID_INTC_EBX_7_0_AVX512PF)
2888                                         add_x86_feature(featureset,
2889                                             X86FSET_AVX512PF);
2890                                 if (cpi->cpi_std[7].cp_ebx &
2891                                     CPUID_INTC_EBX_7_0_AVX512ER)
2892                                         add_x86_feature(featureset,
2893                                             X86FSET_AVX512ER);
2894                                 if (cpi->cpi_std[7].cp_ebx &
2895                                     CPUID_INTC_EBX_7_0_AVX512CD)
2896                                         add_x86_feature(featureset,
2897                                             X86FSET_AVX512CD);
2898                                 if (cpi->cpi_std[7].cp_ebx &
2899                                     CPUID_INTC_EBX_7_0_AVX512BW)
2900                                         add_x86_feature(featureset,
2901                                             X86FSET_AVX512BW);
2902                                 if (cpi->cpi_std[7].cp_ebx &
2903                                     CPUID_INTC_EBX_7_0_AVX512VL)
2904                                         add_x86_feature(featureset,
2905                                             X86FSET_AVX512VL);
2906 
2907                                 if (cpi->cpi_std[7].cp_ecx &
2908                                     CPUID_INTC_ECX_7_0_AVX512VBMI)
2909                                         add_x86_feature(featureset,
2910                                             X86FSET_AVX512VBMI);
2911                                 if (cpi->cpi_std[7].cp_ecx &
2912                                     CPUID_INTC_ECX_7_0_AVX512VNNI)
2913                                         add_x86_feature(featureset,
2914                                             X86FSET_AVX512VNNI);
2915                                 if (cpi->cpi_std[7].cp_ecx &
2916                                     CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2917                                         add_x86_feature(featureset,
2918                                             X86FSET_AVX512VPOPCDQ);
2919 
2920                                 if (cpi->cpi_std[7].cp_edx &
2921                                     CPUID_INTC_EDX_7_0_AVX5124NNIW)
2922                                         add_x86_feature(featureset,
2923                                             X86FSET_AVX512NNIW);
2924                                 if (cpi->cpi_std[7].cp_edx &
2925                                     CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2926                                         add_x86_feature(featureset,
2927                                             X86FSET_AVX512FMAPS);
2928                         }
2929                 }
2930         }
2931 
2932         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2933                 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2934                         add_x86_feature(featureset, X86FSET_PCID);
2935                 }
2936         }
2937 
2938         if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2939                 add_x86_feature(featureset, X86FSET_X2APIC);
2940         }
2941         if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2942                 add_x86_feature(featureset, X86FSET_DE);
2943         }
2944 #if !defined(__xpv)
2945         if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2946 
2947                 /*
2948                  * We require the CLFLUSH instruction for erratum workaround
2949                  * to use MONITOR/MWAIT.
2950                  */
2951                 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2952                         cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2953                         add_x86_feature(featureset, X86FSET_MWAIT);
2954                 } else {
2955                         extern int idle_cpu_assert_cflush_monitor;
2956 
2957                         /*
2958                          * All processors we are aware of which have
2959                          * MONITOR/MWAIT also have CLFLUSH.
2960                          */
2961                         if (idle_cpu_assert_cflush_monitor) {
2962                                 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2963                                     (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2964                         }
2965                 }
2966         }
2967 #endif  /* __xpv */
2968 
2969         if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2970                 add_x86_feature(featureset, X86FSET_VMX);
2971         }
2972 
2973         if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2974                 add_x86_feature(featureset, X86FSET_RDRAND);
2975 
2976         /*
2977          * Only need it first time, rest of the cpus would follow suit.
2978          * we only capture this for the bootcpu.
2979          */
2980         if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2981                 add_x86_feature(featureset, X86FSET_CLFSH);
2982                 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2983         }
2984         if (is_x86_feature(featureset, X86FSET_PAE))
2985                 cpi->cpi_pabits = 36;
2986 
2987         if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2988                 struct cpuid_regs r, *ecp;
2989 
2990                 ecp = &r;
2991                 ecp->cp_eax = 0xD;
2992                 ecp->cp_ecx = 1;
2993                 ecp->cp_edx = ecp->cp_ebx = 0;
2994                 (void) __cpuid_insn(ecp);
2995 
2996                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2997                         add_x86_feature(featureset, X86FSET_XSAVEOPT);
2998                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2999                         add_x86_feature(featureset, X86FSET_XSAVEC);
3000                 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
3001                         add_x86_feature(featureset, X86FSET_XSAVES);
3002         }
3003 
3004         /*
3005          * Work on the "extended" feature information, doing
3006          * some basic initialization for cpuid_pass2()
3007          */
3008         xcpuid = 0;
3009         switch (cpi->cpi_vendor) {
3010         case X86_VENDOR_Intel:
3011                 /*
3012                  * On KVM we know we will have proper support for extended
3013                  * cpuid.
3014                  */
3015                 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
3016                     (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
3017                     (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
3018                         xcpuid++;
3019                 break;
3020         case X86_VENDOR_AMD:
3021                 if (cpi->cpi_family > 5 ||
3022                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3023                         xcpuid++;
3024                 break;
3025         case X86_VENDOR_Cyrix:
3026                 /*
3027                  * Only these Cyrix CPUs are -known- to support
3028                  * extended cpuid operations.
3029                  */
3030                 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
3031                     x86_type == X86_TYPE_CYRIX_GXm)
3032                         xcpuid++;
3033                 break;
3034         case X86_VENDOR_Centaur:
3035         case X86_VENDOR_TM:
3036         default:
3037                 xcpuid++;
3038                 break;
3039         }
3040 
3041         if (xcpuid) {
3042                 cp = &cpi->cpi_extd[0];
3043                 cp->cp_eax = CPUID_LEAF_EXT_0;
3044                 cpi->cpi_xmaxeax = __cpuid_insn(cp);
3045         }
3046 
3047         if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
3048 
3049                 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3050                         cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3051 
3052                 switch (cpi->cpi_vendor) {
3053                 case X86_VENDOR_Intel:
3054                 case X86_VENDOR_AMD:
3055                         if (cpi->cpi_xmaxeax < 0x80000001)
3056                                 break;
3057                         cp = &cpi->cpi_extd[1];
3058                         cp->cp_eax = 0x80000001;
3059                         (void) __cpuid_insn(cp);
3060 
3061                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3062                             cpi->cpi_family == 5 &&
3063                             cpi->cpi_model == 6 &&
3064                             cpi->cpi_step == 6) {
3065                                 /*
3066                                  * K6 model 6 uses bit 10 to indicate SYSC
3067                                  * Later models use bit 11. Fix it here.
3068                                  */
3069                                 if (cp->cp_edx & 0x400) {
3070                                         cp->cp_edx &= ~0x400;
3071                                         cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3072                                 }
3073                         }
3074 
3075                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3076 
3077                         /*
3078                          * Compute the additions to the kernel's feature word.
3079                          */
3080                         if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3081                                 add_x86_feature(featureset, X86FSET_NX);
3082                         }
3083 
3084                         /*
3085                          * Regardless whether or not we boot 64-bit,
3086                          * we should have a way to identify whether
3087                          * the CPU is capable of running 64-bit.
3088                          */
3089                         if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3090                                 add_x86_feature(featureset, X86FSET_64);
3091                         }
3092 
3093                         /* 1 GB large page - enable only for 64 bit kernel */
3094                         if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3095                                 add_x86_feature(featureset, X86FSET_1GPG);
3096                         }
3097 
3098                         if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3099                             (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3100                             (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3101                                 add_x86_feature(featureset, X86FSET_SSE4A);
3102                         }
3103 
3104                         /*
3105                          * It's really tricky to support syscall/sysret in
3106                          * the i386 kernel; we rely on sysenter/sysexit
3107                          * instead.  In the amd64 kernel, things are -way-
3108                          * better.
3109                          */
3110                         if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3111                                 add_x86_feature(featureset, X86FSET_ASYSC);
3112                         }
3113 
3114                         /*
3115                          * While we're thinking about system calls, note
3116                          * that AMD processors don't support sysenter
3117                          * in long mode at all, so don't try to program them.
3118                          */
3119                         if (x86_vendor == X86_VENDOR_AMD) {
3120                                 remove_x86_feature(featureset, X86FSET_SEP);
3121                         }
3122 
3123                         if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3124                                 add_x86_feature(featureset, X86FSET_TSCP);
3125                         }
3126 
3127                         if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3128                                 add_x86_feature(featureset, X86FSET_SVM);
3129                         }
3130 
3131                         if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3132                                 add_x86_feature(featureset, X86FSET_TOPOEXT);
3133                         }
3134 
3135                         if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) {
3136                                 add_x86_feature(featureset, X86FSET_AMD_PCEC);
3137                         }
3138 
3139                         if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3140                                 add_x86_feature(featureset, X86FSET_XOP);
3141                         }
3142 
3143                         if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3144                                 add_x86_feature(featureset, X86FSET_FMA4);
3145                         }
3146 
3147                         if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3148                                 add_x86_feature(featureset, X86FSET_TBM);
3149                         }
3150 
3151                         if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3152                                 add_x86_feature(featureset, X86FSET_MONITORX);
3153                         }
3154                         break;
3155                 default:
3156                         break;
3157                 }
3158 
3159                 /*
3160                  * Get CPUID data about processor cores and hyperthreads.
3161                  */
3162                 switch (cpi->cpi_vendor) {
3163                 case X86_VENDOR_Intel:
3164                         if (cpi->cpi_maxeax >= 4) {
3165                                 cp = &cpi->cpi_std[4];
3166                                 cp->cp_eax = 4;
3167                                 cp->cp_ecx = 0;
3168                                 (void) __cpuid_insn(cp);
3169                                 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3170                         }
3171                         /*FALLTHROUGH*/
3172                 case X86_VENDOR_AMD:
3173                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3174                                 break;
3175                         cp = &cpi->cpi_extd[8];
3176                         cp->cp_eax = CPUID_LEAF_EXT_8;
3177                         (void) __cpuid_insn(cp);
3178                         platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3179                             cp);
3180 
3181                         /*
3182                          * AMD uses ebx for some extended functions.
3183                          */
3184                         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3185                                 /*
3186                                  * While we're here, check for the AMD "Error
3187                                  * Pointer Zero/Restore" feature. This can be
3188                                  * used to setup the FP save handlers
3189                                  * appropriately.
3190                                  */
3191                                 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3192                                         cpi->cpi_fp_amd_save = 0;
3193                                 } else {
3194                                         cpi->cpi_fp_amd_save = 1;
3195                                 }
3196 
3197                                 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3198                                         add_x86_feature(featureset,
3199                                             X86FSET_CLZERO);
3200                                 }
3201                         }
3202 
3203                         /*
3204                          * Virtual and physical address limits from
3205                          * cpuid override previously guessed values.
3206                          */
3207                         cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3208                         cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3209                         break;
3210                 default:
3211                         break;
3212                 }
3213 
3214                 /*
3215                  * Get CPUID data about TSC Invariance in Deep C-State.
3216                  */
3217                 switch (cpi->cpi_vendor) {
3218                 case X86_VENDOR_Intel:
3219                 case X86_VENDOR_AMD:
3220                         if (cpi->cpi_maxeax >= 7) {
3221                                 cp = &cpi->cpi_extd[7];
3222                                 cp->cp_eax = 0x80000007;
3223                                 cp->cp_ecx = 0;
3224                                 (void) __cpuid_insn(cp);
3225                         }
3226                         break;
3227                 default:
3228                         break;
3229                 }
3230         }
3231 
3232         cpuid_pass1_topology(cpu, featureset);
3233 
3234         /*
3235          * Synthesize chip "revision" and socket type
3236          */
3237         cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3238             cpi->cpi_model, cpi->cpi_step);
3239         cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3240             cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3241         cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3242             cpi->cpi_model, cpi->cpi_step);
3243 
3244         if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3245                 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3246                     cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3247                         /* Special handling for AMD FP not necessary. */
3248                         cpi->cpi_fp_amd_save = 0;
3249                 } else {
3250                         cpi->cpi_fp_amd_save = 1;
3251                 }
3252         }
3253 
3254         /*
3255          * Check the processor leaves that are used for security features.
3256          */
3257         cpuid_scan_security(cpu, featureset);
3258 
3259 pass1_done:
3260         cpi->cpi_pass = 1;
3261 }
3262 
3263 /*
3264  * Make copies of the cpuid table entries we depend on, in
3265  * part for ease of parsing now, in part so that we have only
3266  * one place to correct any of it, in part for ease of
3267  * later export to userland, and in part so we can look at
3268  * this stuff in a crash dump.
3269  */
3270 
3271 /*ARGSUSED*/
3272 void
3273 cpuid_pass2(cpu_t *cpu)
3274 {
3275         uint_t n, nmax;
3276         int i;
3277         struct cpuid_regs *cp;
3278         uint8_t *dp;
3279         uint32_t *iptr;
3280         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3281 
3282         ASSERT(cpi->cpi_pass == 1);
3283 
3284         if (cpi->cpi_maxeax < 1)
3285                 goto pass2_done;
3286 
3287         if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3288                 nmax = NMAX_CPI_STD;
3289         /*
3290          * (We already handled n == 0 and n == 1 in pass 1)
3291          */
3292         for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3293                 cp->cp_eax = n;
3294 
3295                 /*
3296                  * n == 7 was handled in pass 1
3297                  */
3298                 if (n == 7)
3299                         continue;
3300 
3301                 /*
3302                  * CPUID function 4 expects %ecx to be initialized
3303                  * with an index which indicates which cache to return
3304                  * information about. The OS is expected to call function 4
3305                  * with %ecx set to 0, 1, 2, ... until it returns with
3306                  * EAX[4:0] set to 0, which indicates there are no more
3307                  * caches.
3308                  *
3309                  * Here, populate cpi_std[4] with the information returned by
3310                  * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3311                  * when dynamic memory allocation becomes available.
3312                  *
3313                  * Note: we need to explicitly initialize %ecx here, since
3314                  * function 4 may have been previously invoked.
3315                  */
3316                 if (n == 4)
3317                         cp->cp_ecx = 0;
3318 
3319                 (void) __cpuid_insn(cp);
3320                 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3321                 switch (n) {
3322                 case 2:
3323                         /*
3324                          * "the lower 8 bits of the %eax register
3325                          * contain a value that identifies the number
3326                          * of times the cpuid [instruction] has to be
3327                          * executed to obtain a complete image of the
3328                          * processor's caching systems."
3329                          *
3330                          * How *do* they make this stuff up?
3331                          */
3332                         cpi->cpi_ncache = sizeof (*cp) *
3333                             BITX(cp->cp_eax, 7, 0);
3334                         if (cpi->cpi_ncache == 0)
3335                                 break;
3336                         cpi->cpi_ncache--;   /* skip count byte */
3337 
3338                         /*
3339                          * Well, for now, rather than attempt to implement
3340                          * this slightly dubious algorithm, we just look
3341                          * at the first 15 ..
3342                          */
3343                         if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3344                                 cpi->cpi_ncache = sizeof (*cp) - 1;
3345 
3346                         dp = cpi->cpi_cacheinfo;
3347                         if (BITX(cp->cp_eax, 31, 31) == 0) {
3348                                 uint8_t *p = (void *)&cp->cp_eax;
3349                                 for (i = 1; i < 4; i++)
3350                                         if (p[i] != 0)
3351                                                 *dp++ = p[i];
3352                         }
3353                         if (BITX(cp->cp_ebx, 31, 31) == 0) {
3354                                 uint8_t *p = (void *)&cp->cp_ebx;
3355                                 for (i = 0; i < 4; i++)
3356                                         if (p[i] != 0)
3357                                                 *dp++ = p[i];
3358                         }
3359                         if (BITX(cp->cp_ecx, 31, 31) == 0) {
3360                                 uint8_t *p = (void *)&cp->cp_ecx;
3361                                 for (i = 0; i < 4; i++)
3362                                         if (p[i] != 0)
3363                                                 *dp++ = p[i];
3364                         }
3365                         if (BITX(cp->cp_edx, 31, 31) == 0) {
3366                                 uint8_t *p = (void *)&cp->cp_edx;
3367                                 for (i = 0; i < 4; i++)
3368                                         if (p[i] != 0)
3369                                                 *dp++ = p[i];
3370                         }
3371                         break;
3372 
3373                 case 3: /* Processor serial number, if PSN supported */
3374                         break;
3375 
3376                 case 4: /* Deterministic cache parameters */
3377                         break;
3378 
3379                 case 5: /* Monitor/Mwait parameters */
3380                 {
3381                         size_t mwait_size;
3382 
3383                         /*
3384                          * check cpi_mwait.support which was set in cpuid_pass1
3385                          */
3386                         if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3387                                 break;
3388 
3389                         /*
3390                          * Protect ourself from insane mwait line size.
3391                          * Workaround for incomplete hardware emulator(s).
3392                          */
3393                         mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3394                         if (mwait_size < sizeof (uint32_t) ||
3395                             !ISP2(mwait_size)) {
3396 #if DEBUG
3397                                 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3398                                     "size %ld", cpu->cpu_id, (long)mwait_size);
3399 #endif
3400                                 break;
3401                         }
3402 
3403                         cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3404                         cpi->cpi_mwait.mon_max = mwait_size;
3405                         if (MWAIT_EXTENSION(cpi)) {
3406                                 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3407                                 if (MWAIT_INT_ENABLE(cpi))
3408                                         cpi->cpi_mwait.support |=
3409                                             MWAIT_ECX_INT_ENABLE;
3410                         }
3411                         break;
3412                 }
3413                 default:
3414                         break;
3415                 }
3416         }
3417 
3418         /*
3419          * XSAVE enumeration
3420          */
3421         if (cpi->cpi_maxeax >= 0xD) {
3422                 struct cpuid_regs regs;
3423                 boolean_t cpuid_d_valid = B_TRUE;
3424 
3425                 cp = &regs;
3426                 cp->cp_eax = 0xD;
3427                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3428 
3429                 (void) __cpuid_insn(cp);
3430 
3431                 /*
3432                  * Sanity checks for debug
3433                  */
3434                 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3435                     (cp->cp_eax & XFEATURE_SSE) == 0) {
3436                         cpuid_d_valid = B_FALSE;
3437                 }
3438 
3439                 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3440                 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3441                 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3442 
3443                 /*
3444                  * If the hw supports AVX, get the size and offset in the save
3445                  * area for the ymm state.
3446                  */
3447                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3448                         cp->cp_eax = 0xD;
3449                         cp->cp_ecx = 2;
3450                         cp->cp_edx = cp->cp_ebx = 0;
3451 
3452                         (void) __cpuid_insn(cp);
3453 
3454                         if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3455                             cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3456                                 cpuid_d_valid = B_FALSE;
3457                         }
3458 
3459                         cpi->cpi_xsave.ymm_size = cp->cp_eax;
3460                         cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3461                 }
3462 
3463                 /*
3464                  * If the hw supports MPX, get the size and offset in the
3465                  * save area for BNDREGS and BNDCSR.
3466                  */
3467                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3468                         cp->cp_eax = 0xD;
3469                         cp->cp_ecx = 3;
3470                         cp->cp_edx = cp->cp_ebx = 0;
3471 
3472                         (void) __cpuid_insn(cp);
3473 
3474                         cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3475                         cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3476 
3477                         cp->cp_eax = 0xD;
3478                         cp->cp_ecx = 4;
3479                         cp->cp_edx = cp->cp_ebx = 0;
3480 
3481                         (void) __cpuid_insn(cp);
3482 
3483                         cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3484                         cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3485                 }
3486 
3487                 /*
3488                  * If the hw supports AVX512, get the size and offset in the
3489                  * save area for the opmask registers and zmm state.
3490                  */
3491                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3492                         cp->cp_eax = 0xD;
3493                         cp->cp_ecx = 5;
3494                         cp->cp_edx = cp->cp_ebx = 0;
3495 
3496                         (void) __cpuid_insn(cp);
3497 
3498                         cpi->cpi_xsave.opmask_size = cp->cp_eax;
3499                         cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3500 
3501                         cp->cp_eax = 0xD;
3502                         cp->cp_ecx = 6;
3503                         cp->cp_edx = cp->cp_ebx = 0;
3504 
3505                         (void) __cpuid_insn(cp);
3506 
3507                         cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3508                         cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3509 
3510                         cp->cp_eax = 0xD;
3511                         cp->cp_ecx = 7;
3512                         cp->cp_edx = cp->cp_ebx = 0;
3513 
3514                         (void) __cpuid_insn(cp);
3515 
3516                         cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3517                         cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3518                 }
3519 
3520                 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3521                         xsave_state_size = 0;
3522                 } else if (cpuid_d_valid) {
3523                         xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3524                 } else {
3525                         /* Broken CPUID 0xD, probably in HVM */
3526                         cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3527                             "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3528                             ", ymm_size = %d, ymm_offset = %d\n",
3529                             cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3530                             cpi->cpi_xsave.xsav_hw_features_high,
3531                             (int)cpi->cpi_xsave.xsav_max_size,
3532                             (int)cpi->cpi_xsave.ymm_size,
3533                             (int)cpi->cpi_xsave.ymm_offset);
3534 
3535                         if (xsave_state_size != 0) {
3536                                 /*
3537                                  * This must be a non-boot CPU. We cannot
3538                                  * continue, because boot cpu has already
3539                                  * enabled XSAVE.
3540                                  */
3541                                 ASSERT(cpu->cpu_id != 0);
3542                                 cmn_err(CE_PANIC, "cpu%d: we have already "
3543                                     "enabled XSAVE on boot cpu, cannot "
3544                                     "continue.", cpu->cpu_id);
3545                         } else {
3546                                 /*
3547                                  * If we reached here on the boot CPU, it's also
3548                                  * almost certain that we'll reach here on the
3549                                  * non-boot CPUs. When we're here on a boot CPU
3550                                  * we should disable the feature, on a non-boot
3551                                  * CPU we need to confirm that we have.
3552                                  */
3553                                 if (cpu->cpu_id == 0) {
3554                                         remove_x86_feature(x86_featureset,
3555                                             X86FSET_XSAVE);
3556                                         remove_x86_feature(x86_featureset,
3557                                             X86FSET_AVX);
3558                                         remove_x86_feature(x86_featureset,
3559                                             X86FSET_F16C);
3560                                         remove_x86_feature(x86_featureset,
3561                                             X86FSET_BMI1);
3562                                         remove_x86_feature(x86_featureset,
3563                                             X86FSET_BMI2);
3564                                         remove_x86_feature(x86_featureset,
3565                                             X86FSET_FMA);
3566                                         remove_x86_feature(x86_featureset,
3567                                             X86FSET_AVX2);
3568                                         remove_x86_feature(x86_featureset,
3569                                             X86FSET_MPX);
3570                                         remove_x86_feature(x86_featureset,
3571                                             X86FSET_AVX512F);
3572                                         remove_x86_feature(x86_featureset,
3573                                             X86FSET_AVX512DQ);
3574                                         remove_x86_feature(x86_featureset,
3575                                             X86FSET_AVX512PF);
3576                                         remove_x86_feature(x86_featureset,
3577                                             X86FSET_AVX512ER);
3578                                         remove_x86_feature(x86_featureset,
3579                                             X86FSET_AVX512CD);
3580                                         remove_x86_feature(x86_featureset,
3581                                             X86FSET_AVX512BW);
3582                                         remove_x86_feature(x86_featureset,
3583                                             X86FSET_AVX512VL);
3584                                         remove_x86_feature(x86_featureset,
3585                                             X86FSET_AVX512FMA);
3586                                         remove_x86_feature(x86_featureset,
3587                                             X86FSET_AVX512VBMI);
3588                                         remove_x86_feature(x86_featureset,
3589                                             X86FSET_AVX512VNNI);
3590                                         remove_x86_feature(x86_featureset,
3591                                             X86FSET_AVX512VPOPCDQ);
3592                                         remove_x86_feature(x86_featureset,
3593                                             X86FSET_AVX512NNIW);
3594                                         remove_x86_feature(x86_featureset,
3595                                             X86FSET_AVX512FMAPS);
3596 
3597                                         CPI_FEATURES_ECX(cpi) &=
3598                                             ~CPUID_INTC_ECX_XSAVE;
3599                                         CPI_FEATURES_ECX(cpi) &=
3600                                             ~CPUID_INTC_ECX_AVX;
3601                                         CPI_FEATURES_ECX(cpi) &=
3602                                             ~CPUID_INTC_ECX_F16C;
3603                                         CPI_FEATURES_ECX(cpi) &=
3604                                             ~CPUID_INTC_ECX_FMA;
3605                                         CPI_FEATURES_7_0_EBX(cpi) &=
3606                                             ~CPUID_INTC_EBX_7_0_BMI1;
3607                                         CPI_FEATURES_7_0_EBX(cpi) &=
3608                                             ~CPUID_INTC_EBX_7_0_BMI2;
3609                                         CPI_FEATURES_7_0_EBX(cpi) &=
3610                                             ~CPUID_INTC_EBX_7_0_AVX2;
3611                                         CPI_FEATURES_7_0_EBX(cpi) &=
3612                                             ~CPUID_INTC_EBX_7_0_MPX;
3613                                         CPI_FEATURES_7_0_EBX(cpi) &=
3614                                             ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3615 
3616                                         CPI_FEATURES_7_0_ECX(cpi) &=
3617                                             ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3618 
3619                                         CPI_FEATURES_7_0_EDX(cpi) &=
3620                                             ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3621 
3622                                         xsave_force_disable = B_TRUE;
3623                                 } else {
3624                                         VERIFY(is_x86_feature(x86_featureset,
3625                                             X86FSET_XSAVE) == B_FALSE);
3626                                 }
3627                         }
3628                 }
3629         }
3630 
3631 
3632         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3633                 goto pass2_done;
3634 
3635         if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3636                 nmax = NMAX_CPI_EXTD;
3637         /*
3638          * Copy the extended properties, fixing them as we go.
3639          * (We already handled n == 0 and n == 1 in pass 1)
3640          */
3641         iptr = (void *)cpi->cpi_brandstr;
3642         for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3643                 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3644                 (void) __cpuid_insn(cp);
3645                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3646                     cp);
3647                 switch (n) {
3648                 case 2:
3649                 case 3:
3650                 case 4:
3651                         /*
3652                          * Extract the brand string
3653                          */
3654                         *iptr++ = cp->cp_eax;
3655                         *iptr++ = cp->cp_ebx;
3656                         *iptr++ = cp->cp_ecx;
3657                         *iptr++ = cp->cp_edx;
3658                         break;
3659                 case 5:
3660                         switch (cpi->cpi_vendor) {
3661                         case X86_VENDOR_AMD:
3662                                 /*
3663                                  * The Athlon and Duron were the first
3664                                  * parts to report the sizes of the
3665                                  * TLB for large pages. Before then,
3666                                  * we don't trust the data.
3667                                  */
3668                                 if (cpi->cpi_family < 6 ||
3669                                     (cpi->cpi_family == 6 &&
3670                                     cpi->cpi_model < 1))
3671                                         cp->cp_eax = 0;
3672                                 break;
3673                         default:
3674                                 break;
3675                         }
3676                         break;
3677                 case 6:
3678                         switch (cpi->cpi_vendor) {
3679                         case X86_VENDOR_AMD:
3680                                 /*
3681                                  * The Athlon and Duron were the first
3682                                  * AMD parts with L2 TLB's.
3683                                  * Before then, don't trust the data.
3684                                  */
3685                                 if (cpi->cpi_family < 6 ||
3686                                     cpi->cpi_family == 6 &&
3687                                     cpi->cpi_model < 1)
3688                                         cp->cp_eax = cp->cp_ebx = 0;
3689                                 /*
3690                                  * AMD Duron rev A0 reports L2
3691                                  * cache size incorrectly as 1K
3692                                  * when it is really 64K
3693                                  */
3694                                 if (cpi->cpi_family == 6 &&
3695                                     cpi->cpi_model == 3 &&
3696                                     cpi->cpi_step == 0) {
3697                                         cp->cp_ecx &= 0xffff;
3698                                         cp->cp_ecx |= 0x400000;
3699                                 }
3700                                 break;
3701                         case X86_VENDOR_Cyrix:  /* VIA C3 */
3702                                 /*
3703                                  * VIA C3 processors are a bit messed
3704                                  * up w.r.t. encoding cache sizes in %ecx
3705                                  */
3706                                 if (cpi->cpi_family != 6)
3707                                         break;
3708                                 /*
3709                                  * model 7 and 8 were incorrectly encoded
3710                                  *
3711                                  * xxx is model 8 really broken?
3712                                  */
3713                                 if (cpi->cpi_model == 7 ||
3714                                     cpi->cpi_model == 8)
3715                                         cp->cp_ecx =
3716                                             BITX(cp->cp_ecx, 31, 24) << 16 |
3717                                             BITX(cp->cp_ecx, 23, 16) << 12 |
3718                                             BITX(cp->cp_ecx, 15, 8) << 8 |
3719                                             BITX(cp->cp_ecx, 7, 0);
3720                                 /*
3721                                  * model 9 stepping 1 has wrong associativity
3722                                  */
3723                                 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3724                                         cp->cp_ecx |= 8 << 12;
3725                                 break;
3726                         case X86_VENDOR_Intel:
3727                                 /*
3728                                  * Extended L2 Cache features function.
3729                                  * First appeared on Prescott.
3730                                  */
3731                         default:
3732                                 break;
3733                         }
3734                         break;
3735                 default:
3736                         break;
3737                 }
3738         }
3739 
3740 pass2_done:
3741         cpi->cpi_pass = 2;
3742 }
3743 
3744 static const char *
3745 intel_cpubrand(const struct cpuid_info *cpi)
3746 {
3747         int i;
3748 
3749         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3750             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3751                 return ("i486");
3752 
3753         switch (cpi->cpi_family) {
3754         case 5:
3755                 return ("Intel Pentium(r)");
3756         case 6:
3757                 switch (cpi->cpi_model) {
3758                         uint_t celeron, xeon;
3759                         const struct cpuid_regs *cp;
3760                 case 0:
3761                 case 1:
3762                 case 2:
3763                         return ("Intel Pentium(r) Pro");
3764                 case 3:
3765                 case 4:
3766                         return ("Intel Pentium(r) II");
3767                 case 6:
3768                         return ("Intel Celeron(r)");
3769                 case 5:
3770                 case 7:
3771                         celeron = xeon = 0;
3772                         cp = &cpi->cpi_std[2];   /* cache info */
3773 
3774                         for (i = 1; i < 4; i++) {
3775                                 uint_t tmp;
3776 
3777                                 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3778                                 if (tmp == 0x40)
3779                                         celeron++;
3780                                 if (tmp >= 0x44 && tmp <= 0x45)
3781                                         xeon++;
3782                         }
3783 
3784                         for (i = 0; i < 2; i++) {
3785                                 uint_t tmp;
3786 
3787                                 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3788                                 if (tmp == 0x40)
3789                                         celeron++;
3790                                 else if (tmp >= 0x44 && tmp <= 0x45)
3791                                         xeon++;
3792                         }
3793 
3794                         for (i = 0; i < 4; i++) {
3795                                 uint_t tmp;
3796 
3797                                 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3798                                 if (tmp == 0x40)
3799                                         celeron++;
3800                                 else if (tmp >= 0x44 && tmp <= 0x45)
3801                                         xeon++;
3802                         }
3803 
3804                         for (i = 0; i < 4; i++) {
3805                                 uint_t tmp;
3806 
3807                                 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3808                                 if (tmp == 0x40)
3809                                         celeron++;
3810                                 else if (tmp >= 0x44 && tmp <= 0x45)
3811                                         xeon++;
3812                         }
3813 
3814                         if (celeron)
3815                                 return ("Intel Celeron(r)");
3816                         if (xeon)
3817                                 return (cpi->cpi_model == 5 ?
3818                                     "Intel Pentium(r) II Xeon(tm)" :
3819                                     "Intel Pentium(r) III Xeon(tm)");
3820                         return (cpi->cpi_model == 5 ?
3821                             "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3822                             "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3823                 default:
3824                         break;
3825                 }
3826         default:
3827                 break;
3828         }
3829 
3830         /* BrandID is present if the field is nonzero */
3831         if (cpi->cpi_brandid != 0) {
3832                 static const struct {
3833                         uint_t bt_bid;
3834                         const char *bt_str;
3835                 } brand_tbl[] = {
3836                         { 0x1,  "Intel(r) Celeron(r)" },
3837                         { 0x2,  "Intel(r) Pentium(r) III" },
3838                         { 0x3,  "Intel(r) Pentium(r) III Xeon(tm)" },
3839                         { 0x4,  "Intel(r) Pentium(r) III" },
3840                         { 0x6,  "Mobile Intel(r) Pentium(r) III" },
3841                         { 0x7,  "Mobile Intel(r) Celeron(r)" },
3842                         { 0x8,  "Intel(r) Pentium(r) 4" },
3843                         { 0x9,  "Intel(r) Pentium(r) 4" },
3844                         { 0xa,  "Intel(r) Celeron(r)" },
3845                         { 0xb,  "Intel(r) Xeon(tm)" },
3846                         { 0xc,  "Intel(r) Xeon(tm) MP" },
3847                         { 0xe,  "Mobile Intel(r) Pentium(r) 4" },
3848                         { 0xf,  "Mobile Intel(r) Celeron(r)" },
3849                         { 0x11, "Mobile Genuine Intel(r)" },
3850                         { 0x12, "Intel(r) Celeron(r) M" },
3851                         { 0x13, "Mobile Intel(r) Celeron(r)" },
3852                         { 0x14, "Intel(r) Celeron(r)" },
3853                         { 0x15, "Mobile Genuine Intel(r)" },
3854                         { 0x16, "Intel(r) Pentium(r) M" },
3855                         { 0x17, "Mobile Intel(r) Celeron(r)" }
3856                 };
3857                 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3858                 uint_t sgn;
3859 
3860                 sgn = (cpi->cpi_family << 8) |
3861                     (cpi->cpi_model << 4) | cpi->cpi_step;
3862 
3863                 for (i = 0; i < btblmax; i++)
3864                         if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3865                                 break;
3866                 if (i < btblmax) {
3867                         if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3868                                 return ("Intel(r) Celeron(r)");
3869                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3870                                 return ("Intel(r) Xeon(tm) MP");
3871                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3872                                 return ("Intel(r) Xeon(tm)");
3873                         return (brand_tbl[i].bt_str);
3874                 }
3875         }
3876 
3877         return (NULL);
3878 }
3879 
3880 static const char *
3881 amd_cpubrand(const struct cpuid_info *cpi)
3882 {
3883         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3884             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3885                 return ("i486 compatible");
3886 
3887         switch (cpi->cpi_family) {
3888         case 5:
3889                 switch (cpi->cpi_model) {
3890                 case 0:
3891                 case 1:
3892                 case 2:
3893                 case 3:
3894                 case 4:
3895                 case 5:
3896                         return ("AMD-K5(r)");
3897                 case 6:
3898                 case 7:
3899                         return ("AMD-K6(r)");
3900                 case 8:
3901                         return ("AMD-K6(r)-2");
3902                 case 9:
3903                         return ("AMD-K6(r)-III");
3904                 default:
3905                         return ("AMD (family 5)");
3906                 }
3907         case 6:
3908                 switch (cpi->cpi_model) {
3909                 case 1:
3910                         return ("AMD-K7(tm)");
3911                 case 0:
3912                 case 2:
3913                 case 4:
3914                         return ("AMD Athlon(tm)");
3915                 case 3:
3916                 case 7:
3917                         return ("AMD Duron(tm)");
3918                 case 6:
3919                 case 8:
3920                 case 10:
3921                         /*
3922                          * Use the L2 cache size to distinguish
3923                          */
3924                         return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3925                             "AMD Athlon(tm)" : "AMD Duron(tm)");
3926                 default:
3927                         return ("AMD (family 6)");
3928                 }
3929         default:
3930                 break;
3931         }
3932 
3933         if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3934             cpi->cpi_brandid != 0) {
3935                 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3936                 case 3:
3937                         return ("AMD Opteron(tm) UP 1xx");
3938                 case 4:
3939                         return ("AMD Opteron(tm) DP 2xx");
3940                 case 5:
3941                         return ("AMD Opteron(tm) MP 8xx");
3942                 default:
3943                         return ("AMD Opteron(tm)");
3944                 }
3945         }
3946 
3947         return (NULL);
3948 }
3949 
3950 static const char *
3951 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3952 {
3953         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3954             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3955             type == X86_TYPE_CYRIX_486)
3956                 return ("i486 compatible");
3957 
3958         switch (type) {
3959         case X86_TYPE_CYRIX_6x86:
3960                 return ("Cyrix 6x86");
3961         case X86_TYPE_CYRIX_6x86L:
3962                 return ("Cyrix 6x86L");
3963         case X86_TYPE_CYRIX_6x86MX:
3964                 return ("Cyrix 6x86MX");
3965         case X86_TYPE_CYRIX_GXm:
3966                 return ("Cyrix GXm");
3967         case X86_TYPE_CYRIX_MediaGX:
3968                 return ("Cyrix MediaGX");
3969         case X86_TYPE_CYRIX_MII:
3970                 return ("Cyrix M2");
3971         case X86_TYPE_VIA_CYRIX_III:
3972                 return ("VIA Cyrix M3");
3973         default:
3974                 /*
3975                  * Have another wild guess ..
3976                  */
3977                 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3978                         return ("Cyrix 5x86");
3979                 else if (cpi->cpi_family == 5) {
3980                         switch (cpi->cpi_model) {
3981                         case 2:
3982                                 return ("Cyrix 6x86");  /* Cyrix M1 */
3983                         case 4:
3984                                 return ("Cyrix MediaGX");
3985                         default:
3986                                 break;
3987                         }
3988                 } else if (cpi->cpi_family == 6) {
3989                         switch (cpi->cpi_model) {
3990                         case 0:
3991                                 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3992                         case 5:
3993                         case 6:
3994                         case 7:
3995                         case 8:
3996                         case 9:
3997                                 return ("VIA C3");
3998                         default:
3999                                 break;
4000                         }
4001                 }
4002                 break;
4003         }
4004         return (NULL);
4005 }
4006 
4007 /*
4008  * This only gets called in the case that the CPU extended
4009  * feature brand string (0x80000002, 0x80000003, 0x80000004)
4010  * aren't available, or contain null bytes for some reason.
4011  */
4012 static void
4013 fabricate_brandstr(struct cpuid_info *cpi)
4014 {
4015         const char *brand = NULL;
4016 
4017         switch (cpi->cpi_vendor) {
4018         case X86_VENDOR_Intel:
4019                 brand = intel_cpubrand(cpi);
4020                 break;
4021         case X86_VENDOR_AMD:
4022                 brand = amd_cpubrand(cpi);
4023                 break;
4024         case X86_VENDOR_Cyrix:
4025                 brand = cyrix_cpubrand(cpi, x86_type);
4026                 break;
4027         case X86_VENDOR_NexGen:
4028                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4029                         brand = "NexGen Nx586";
4030                 break;
4031         case X86_VENDOR_Centaur:
4032                 if (cpi->cpi_family == 5)
4033                         switch (cpi->cpi_model) {
4034                         case 4:
4035                                 brand = "Centaur C6";
4036                                 break;
4037                         case 8:
4038                                 brand = "Centaur C2";
4039                                 break;
4040                         case 9:
4041                                 brand = "Centaur C3";
4042                                 break;
4043                         default:
4044                                 break;
4045                         }
4046                 break;
4047         case X86_VENDOR_Rise:
4048                 if (cpi->cpi_family == 5 &&
4049                     (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4050                         brand = "Rise mP6";
4051                 break;
4052         case X86_VENDOR_SiS:
4053                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4054                         brand = "SiS 55x";
4055                 break;
4056         case X86_VENDOR_TM:
4057                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4058                         brand = "Transmeta Crusoe TM3x00 or TM5x00";
4059                 break;
4060         case X86_VENDOR_NSC:
4061         case X86_VENDOR_UMC:
4062         default:
4063                 break;
4064         }
4065         if (brand) {
4066                 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4067                 return;
4068         }
4069 
4070         /*
4071          * If all else fails ...
4072          */
4073         (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4074             "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4075             cpi->cpi_model, cpi->cpi_step);
4076 }
4077 
4078 /*
4079  * This routine is called just after kernel memory allocation
4080  * becomes available on cpu0, and as part of mp_startup() on
4081  * the other cpus.
4082  *
4083  * Fixup the brand string, and collect any information from cpuid
4084  * that requires dynamically allocated storage to represent.
4085  */
4086 /*ARGSUSED*/
4087 void
4088 cpuid_pass3(cpu_t *cpu)
4089 {
4090         int     i, max, shft, level, size;
4091         struct cpuid_regs regs;
4092         struct cpuid_regs *cp;
4093         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4094 
4095         ASSERT(cpi->cpi_pass == 2);
4096 
4097         /*
4098          * Deterministic cache parameters
4099          *
4100          * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4101          * values that are present are currently defined to be the same. This
4102          * means we can use the same logic to parse it as long as we use the
4103          * appropriate leaf to get the data. If you're updating this, make sure
4104          * you're careful about which vendor supports which aspect.
4105          *
4106          * Take this opportunity to detect the number of threads sharing the
4107          * last level cache, and construct a corresponding cache id. The
4108          * respective cpuid_info members are initialized to the default case of
4109          * "no last level cache sharing".
4110          */
4111         cpi->cpi_ncpu_shr_last_cache = 1;
4112         cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4113 
4114         if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4115             (cpi->cpi_vendor == X86_VENDOR_AMD &&
4116             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4117             is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4118                 uint32_t leaf;
4119 
4120                 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4121                         leaf = 4;
4122                 } else {
4123                         leaf = CPUID_LEAF_EXT_1d;
4124                 }
4125 
4126                 /*
4127                  * Find the # of elements (size) returned by the leaf and along
4128                  * the way detect last level cache sharing details.
4129                  */
4130                 bzero(&regs, sizeof (regs));
4131                 cp = &regs;
4132                 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4133                         cp->cp_eax = leaf;
4134                         cp->cp_ecx = i;
4135 
4136                         (void) __cpuid_insn(cp);
4137 
4138                         if (CPI_CACHE_TYPE(cp) == 0)
4139                                 break;
4140                         level = CPI_CACHE_LVL(cp);
4141                         if (level > max) {
4142                                 max = level;
4143                                 cpi->cpi_ncpu_shr_last_cache =
4144                                     CPI_NTHR_SHR_CACHE(cp) + 1;
4145                         }
4146                 }
4147                 cpi->cpi_cache_leaf_size = size = i;
4148 
4149                 /*
4150                  * Allocate the cpi_cache_leaves array. The first element
4151                  * references the regs for the corresponding leaf with %ecx set
4152                  * to 0. This was gathered in cpuid_pass2().
4153                  */
4154                 if (size > 0) {
4155                         cpi->cpi_cache_leaves =
4156                             kmem_alloc(size * sizeof (cp), KM_SLEEP);
4157                         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4158                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4159                         } else {
4160                                 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4161                         }
4162 
4163                         /*
4164                          * Allocate storage to hold the additional regs
4165                          * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4166                          *
4167                          * The regs for the leaf, %ecx == 0 has already
4168                          * been allocated as indicated above.
4169                          */
4170                         for (i = 1; i < size; i++) {
4171                                 cp = cpi->cpi_cache_leaves[i] =
4172                                     kmem_zalloc(sizeof (regs), KM_SLEEP);
4173                                 cp->cp_eax = leaf;
4174                                 cp->cp_ecx = i;
4175 
4176                                 (void) __cpuid_insn(cp);
4177                         }
4178                 }
4179                 /*
4180                  * Determine the number of bits needed to represent
4181                  * the number of CPUs sharing the last level cache.
4182                  *
4183                  * Shift off that number of bits from the APIC id to
4184                  * derive the cache id.
4185                  */
4186                 shft = 0;
4187                 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4188                         shft++;
4189                 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4190         }
4191 
4192         /*
4193          * Now fixup the brand string
4194          */
4195         if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4196                 fabricate_brandstr(cpi);
4197         } else {
4198 
4199                 /*
4200                  * If we successfully extracted a brand string from the cpuid
4201                  * instruction, clean it up by removing leading spaces and
4202                  * similar junk.
4203                  */
4204                 if (cpi->cpi_brandstr[0]) {
4205                         size_t maxlen = sizeof (cpi->cpi_brandstr);
4206                         char *src, *dst;
4207 
4208                         dst = src = (char *)cpi->cpi_brandstr;
4209                         src[maxlen - 1] = '\0';
4210                         /*
4211                          * strip leading spaces
4212                          */
4213                         while (*src == ' ')
4214                                 src++;
4215                         /*
4216                          * Remove any 'Genuine' or "Authentic" prefixes
4217                          */
4218                         if (strncmp(src, "Genuine ", 8) == 0)
4219                                 src += 8;
4220                         if (strncmp(src, "Authentic ", 10) == 0)
4221                                 src += 10;
4222 
4223                         /*
4224                          * Now do an in-place copy.
4225                          * Map (R) to (r) and (TM) to (tm).
4226                          * The era of teletypes is long gone, and there's
4227                          * -really- no need to shout.
4228                          */
4229                         while (*src != '\0') {
4230                                 if (src[0] == '(') {
4231                                         if (strncmp(src + 1, "R)", 2) == 0) {
4232                                                 (void) strncpy(dst, "(r)", 3);
4233                                                 src += 3;
4234                                                 dst += 3;
4235                                                 continue;
4236                                         }
4237                                         if (strncmp(src + 1, "TM)", 3) == 0) {
4238                                                 (void) strncpy(dst, "(tm)", 4);
4239                                                 src += 4;
4240                                                 dst += 4;
4241                                                 continue;
4242                                         }
4243                                 }
4244                                 *dst++ = *src++;
4245                         }
4246                         *dst = '\0';
4247 
4248                         /*
4249                          * Finally, remove any trailing spaces
4250                          */
4251                         while (--dst > cpi->cpi_brandstr)
4252                                 if (*dst == ' ')
4253                                         *dst = '\0';
4254                                 else
4255                                         break;
4256                 } else
4257                         fabricate_brandstr(cpi);
4258         }
4259         cpi->cpi_pass = 3;
4260 }
4261 
4262 /*
4263  * This routine is called out of bind_hwcap() much later in the life
4264  * of the kernel (post_startup()).  The job of this routine is to resolve
4265  * the hardware feature support and kernel support for those features into
4266  * what we're actually going to tell applications via the aux vector.
4267  */
4268 void
4269 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4270 {
4271         struct cpuid_info *cpi;
4272         uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4273 
4274         if (cpu == NULL)
4275                 cpu = CPU;
4276         cpi = cpu->cpu_m.mcpu_cpi;
4277 
4278         ASSERT(cpi->cpi_pass == 3);
4279 
4280         if (cpi->cpi_maxeax >= 1) {
4281                 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4282                 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4283                 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4284 
4285                 *edx = CPI_FEATURES_EDX(cpi);
4286                 *ecx = CPI_FEATURES_ECX(cpi);
4287                 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4288 
4289                 /*
4290                  * [these require explicit kernel support]
4291                  */
4292                 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4293                         *edx &= ~CPUID_INTC_EDX_SEP;
4294 
4295                 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4296                         *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4297                 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4298                         *edx &= ~CPUID_INTC_EDX_SSE2;
4299 
4300                 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4301                         *edx &= ~CPUID_INTC_EDX_HTT;
4302 
4303                 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4304                         *ecx &= ~CPUID_INTC_ECX_SSE3;
4305 
4306                 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4307                         *ecx &= ~CPUID_INTC_ECX_SSSE3;
4308                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4309                         *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4310                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4311                         *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4312                 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4313                         *ecx &= ~CPUID_INTC_ECX_AES;
4314                 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4315                         *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4316                 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4317                         *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4318                             CPUID_INTC_ECX_OSXSAVE);
4319                 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4320                         *ecx &= ~CPUID_INTC_ECX_AVX;
4321                 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4322                         *ecx &= ~CPUID_INTC_ECX_F16C;
4323                 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4324                         *ecx &= ~CPUID_INTC_ECX_FMA;
4325                 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4326                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4327                 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4328                         *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4329                 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4330                         *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4331                 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4332                         *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4333                 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4334                         *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4335 
4336                 /*
4337                  * [no explicit support required beyond x87 fp context]
4338                  */
4339                 if (!fpu_exists)
4340                         *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4341 
4342                 /*
4343                  * Now map the supported feature vector to things that we
4344                  * think userland will care about.
4345                  */
4346                 if (*edx & CPUID_INTC_EDX_SEP)
4347                         hwcap_flags |= AV_386_SEP;
4348                 if (*edx & CPUID_INTC_EDX_SSE)
4349                         hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4350                 if (*edx & CPUID_INTC_EDX_SSE2)
4351                         hwcap_flags |= AV_386_SSE2;
4352                 if (*ecx & CPUID_INTC_ECX_SSE3)
4353                         hwcap_flags |= AV_386_SSE3;
4354                 if (*ecx & CPUID_INTC_ECX_SSSE3)
4355                         hwcap_flags |= AV_386_SSSE3;
4356                 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4357                         hwcap_flags |= AV_386_SSE4_1;
4358                 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4359                         hwcap_flags |= AV_386_SSE4_2;
4360                 if (*ecx & CPUID_INTC_ECX_MOVBE)
4361                         hwcap_flags |= AV_386_MOVBE;
4362                 if (*ecx & CPUID_INTC_ECX_AES)
4363                         hwcap_flags |= AV_386_AES;
4364                 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4365                         hwcap_flags |= AV_386_PCLMULQDQ;
4366                 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4367                     (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4368                         hwcap_flags |= AV_386_XSAVE;
4369 
4370                         if (*ecx & CPUID_INTC_ECX_AVX) {
4371                                 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4372                                 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4373 
4374                                 hwcap_flags |= AV_386_AVX;
4375                                 if (*ecx & CPUID_INTC_ECX_F16C)
4376                                         hwcap_flags_2 |= AV_386_2_F16C;
4377                                 if (*ecx & CPUID_INTC_ECX_FMA)
4378                                         hwcap_flags_2 |= AV_386_2_FMA;
4379 
4380                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4381                                         hwcap_flags_2 |= AV_386_2_BMI1;
4382                                 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4383                                         hwcap_flags_2 |= AV_386_2_BMI2;
4384                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4385                                         hwcap_flags_2 |= AV_386_2_AVX2;
4386                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4387                                         hwcap_flags_2 |= AV_386_2_AVX512F;
4388                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4389                                         hwcap_flags_2 |= AV_386_2_AVX512DQ;
4390                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4391                                         hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4392                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4393                                         hwcap_flags_2 |= AV_386_2_AVX512PF;
4394                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4395                                         hwcap_flags_2 |= AV_386_2_AVX512ER;
4396                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4397                                         hwcap_flags_2 |= AV_386_2_AVX512CD;
4398                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4399                                         hwcap_flags_2 |= AV_386_2_AVX512BW;
4400                                 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4401                                         hwcap_flags_2 |= AV_386_2_AVX512VL;
4402 
4403                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4404                                         hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4405                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4406                                         hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4407                                 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4408                                         hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4409 
4410                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4411                                         hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4412                                 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4413                                         hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4414                         }
4415                 }
4416                 if (*ecx & CPUID_INTC_ECX_VMX)
4417                         hwcap_flags |= AV_386_VMX;
4418                 if (*ecx & CPUID_INTC_ECX_POPCNT)
4419                         hwcap_flags |= AV_386_POPCNT;
4420                 if (*edx & CPUID_INTC_EDX_FPU)
4421                         hwcap_flags |= AV_386_FPU;
4422                 if (*edx & CPUID_INTC_EDX_MMX)
4423                         hwcap_flags |= AV_386_MMX;
4424 
4425                 if (*edx & CPUID_INTC_EDX_TSC)
4426                         hwcap_flags |= AV_386_TSC;
4427                 if (*edx & CPUID_INTC_EDX_CX8)
4428                         hwcap_flags |= AV_386_CX8;
4429                 if (*edx & CPUID_INTC_EDX_CMOV)
4430                         hwcap_flags |= AV_386_CMOV;
4431                 if (*ecx & CPUID_INTC_ECX_CX16)
4432                         hwcap_flags |= AV_386_CX16;
4433 
4434                 if (*ecx & CPUID_INTC_ECX_RDRAND)
4435                         hwcap_flags_2 |= AV_386_2_RDRAND;
4436                 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4437                         hwcap_flags_2 |= AV_386_2_ADX;
4438                 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4439                         hwcap_flags_2 |= AV_386_2_RDSEED;
4440                 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4441                         hwcap_flags_2 |= AV_386_2_SHA;
4442                 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4443                         hwcap_flags_2 |= AV_386_2_FSGSBASE;
4444                 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4445                         hwcap_flags_2 |= AV_386_2_CLWB;
4446                 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4447                         hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4448 
4449         }
4450         /*
4451          * Check a few miscilaneous features.
4452          */
4453         if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4454                 hwcap_flags_2 |= AV_386_2_CLZERO;
4455 
4456         if (cpi->cpi_xmaxeax < 0x80000001)
4457                 goto pass4_done;
4458 
4459         switch (cpi->cpi_vendor) {
4460                 struct cpuid_regs cp;
4461                 uint32_t *edx, *ecx;
4462 
4463         case X86_VENDOR_Intel:
4464                 /*
4465                  * Seems like Intel duplicated what we necessary
4466                  * here to make the initial crop of 64-bit OS's work.
4467                  * Hopefully, those are the only "extended" bits
4468                  * they'll add.
4469                  */
4470                 /*FALLTHROUGH*/
4471 
4472         case X86_VENDOR_AMD:
4473                 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4474                 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4475 
4476                 *edx = CPI_FEATURES_XTD_EDX(cpi);
4477                 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4478 
4479                 /*
4480                  * [these features require explicit kernel support]
4481                  */
4482                 switch (cpi->cpi_vendor) {
4483                 case X86_VENDOR_Intel:
4484                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4485                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4486                         break;
4487 
4488                 case X86_VENDOR_AMD:
4489                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4490                                 *edx &= ~CPUID_AMD_EDX_TSCP;
4491                         if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4492                                 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4493                         break;
4494 
4495                 default:
4496                         break;
4497                 }
4498 
4499                 /*
4500                  * [no explicit support required beyond
4501                  * x87 fp context and exception handlers]
4502                  */
4503                 if (!fpu_exists)
4504                         *edx &= ~(CPUID_AMD_EDX_MMXamd |
4505                             CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4506 
4507                 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4508                         *edx &= ~CPUID_AMD_EDX_NX;
4509 #if !defined(__amd64)
4510                 *edx &= ~CPUID_AMD_EDX_LM;
4511 #endif
4512                 /*
4513                  * Now map the supported feature vector to
4514                  * things that we think userland will care about.
4515                  */
4516 #if defined(__amd64)
4517                 if (*edx & CPUID_AMD_EDX_SYSC)
4518                         hwcap_flags |= AV_386_AMD_SYSC;
4519 #endif
4520                 if (*edx & CPUID_AMD_EDX_MMXamd)
4521                         hwcap_flags |= AV_386_AMD_MMX;
4522                 if (*edx & CPUID_AMD_EDX_3DNow)
4523                         hwcap_flags |= AV_386_AMD_3DNow;
4524                 if (*edx & CPUID_AMD_EDX_3DNowx)
4525                         hwcap_flags |= AV_386_AMD_3DNowx;
4526                 if (*ecx & CPUID_AMD_ECX_SVM)
4527                         hwcap_flags |= AV_386_AMD_SVM;
4528 
4529                 switch (cpi->cpi_vendor) {
4530                 case X86_VENDOR_AMD:
4531                         if (*edx & CPUID_AMD_EDX_TSCP)
4532                                 hwcap_flags |= AV_386_TSCP;
4533                         if (*ecx & CPUID_AMD_ECX_AHF64)
4534                                 hwcap_flags |= AV_386_AHF;
4535                         if (*ecx & CPUID_AMD_ECX_SSE4A)
4536                                 hwcap_flags |= AV_386_AMD_SSE4A;
4537                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4538                                 hwcap_flags |= AV_386_AMD_LZCNT;
4539                         if (*ecx & CPUID_AMD_ECX_MONITORX)
4540                                 hwcap_flags_2 |= AV_386_2_MONITORX;
4541                         break;
4542 
4543                 case X86_VENDOR_Intel:
4544                         if (*edx & CPUID_AMD_EDX_TSCP)
4545                                 hwcap_flags |= AV_386_TSCP;
4546                         if (*ecx & CPUID_AMD_ECX_LZCNT)
4547                                 hwcap_flags |= AV_386_AMD_LZCNT;
4548                         /*
4549                          * Aarrgh.
4550                          * Intel uses a different bit in the same word.
4551                          */
4552                         if (*ecx & CPUID_INTC_ECX_AHF64)
4553                                 hwcap_flags |= AV_386_AHF;
4554                         break;
4555 
4556                 default:
4557                         break;
4558                 }
4559                 break;
4560 
4561         case X86_VENDOR_TM:
4562                 cp.cp_eax = 0x80860001;
4563                 (void) __cpuid_insn(&cp);
4564                 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4565                 break;
4566 
4567         default:
4568                 break;
4569         }
4570 
4571 pass4_done:
4572         cpi->cpi_pass = 4;
4573         if (hwcap_out != NULL) {
4574                 hwcap_out[0] = hwcap_flags;
4575                 hwcap_out[1] = hwcap_flags_2;
4576         }
4577 }
4578 
4579 
4580 /*
4581  * Simulate the cpuid instruction using the data we previously
4582  * captured about this CPU.  We try our best to return the truth
4583  * about the hardware, independently of kernel support.
4584  */
4585 uint32_t
4586 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4587 {
4588         struct cpuid_info *cpi;
4589         struct cpuid_regs *xcp;
4590 
4591         if (cpu == NULL)
4592                 cpu = CPU;
4593         cpi = cpu->cpu_m.mcpu_cpi;
4594 
4595         ASSERT(cpuid_checkpass(cpu, 3));
4596 
4597         /*
4598          * CPUID data is cached in two separate places: cpi_std for standard
4599          * CPUID leaves , and cpi_extd for extended CPUID leaves.
4600          */
4601         if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4602                 xcp = &cpi->cpi_std[cp->cp_eax];
4603         } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4604             cp->cp_eax <= cpi->cpi_xmaxeax &&
4605             cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4606                 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4607         } else {
4608                 /*
4609                  * The caller is asking for data from an input parameter which
4610                  * the kernel has not cached.  In this case we go fetch from
4611                  * the hardware and return the data directly to the user.
4612                  */
4613                 return (__cpuid_insn(cp));
4614         }
4615 
4616         cp->cp_eax = xcp->cp_eax;
4617         cp->cp_ebx = xcp->cp_ebx;
4618         cp->cp_ecx = xcp->cp_ecx;
4619         cp->cp_edx = xcp->cp_edx;
4620         return (cp->cp_eax);
4621 }
4622 
4623 int
4624 cpuid_checkpass(cpu_t *cpu, int pass)
4625 {
4626         return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4627             cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4628 }
4629 
4630 int
4631 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4632 {
4633         ASSERT(cpuid_checkpass(cpu, 3));
4634 
4635         return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4636 }
4637 
4638 int
4639 cpuid_is_cmt(cpu_t *cpu)
4640 {
4641         if (cpu == NULL)
4642                 cpu = CPU;
4643 
4644         ASSERT(cpuid_checkpass(cpu, 1));
4645 
4646         return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4647 }
4648 
4649 /*
4650  * AMD and Intel both implement the 64-bit variant of the syscall
4651  * instruction (syscallq), so if there's -any- support for syscall,
4652  * cpuid currently says "yes, we support this".
4653  *
4654  * However, Intel decided to -not- implement the 32-bit variant of the
4655  * syscall instruction, so we provide a predicate to allow our caller
4656  * to test that subtlety here.
4657  *
4658  * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4659  *      even in the case where the hardware would in fact support it.
4660  */
4661 /*ARGSUSED*/
4662 int
4663 cpuid_syscall32_insn(cpu_t *cpu)
4664 {
4665         ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4666 
4667 #if !defined(__xpv)
4668         if (cpu == NULL)
4669                 cpu = CPU;
4670 
4671         /*CSTYLED*/
4672         {
4673                 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4674 
4675                 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4676                     cpi->cpi_xmaxeax >= 0x80000001 &&
4677                     (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4678                         return (1);
4679         }
4680 #endif
4681         return (0);
4682 }
4683 
4684 int
4685 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4686 {
4687         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4688 
4689         static const char fmt[] =
4690             "x86 (%s %X family %d model %d step %d clock %d MHz)";
4691         static const char fmt_ht[] =
4692             "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4693 
4694         ASSERT(cpuid_checkpass(cpu, 1));
4695 
4696         if (cpuid_is_cmt(cpu))
4697                 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4698                     cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4699                     cpi->cpi_family, cpi->cpi_model,
4700                     cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4701         return (snprintf(s, n, fmt,
4702             cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4703             cpi->cpi_family, cpi->cpi_model,
4704             cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4705 }
4706 
4707 const char *
4708 cpuid_getvendorstr(cpu_t *cpu)
4709 {
4710         ASSERT(cpuid_checkpass(cpu, 1));
4711         return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4712 }
4713 
4714 uint_t
4715 cpuid_getvendor(cpu_t *cpu)
4716 {
4717         ASSERT(cpuid_checkpass(cpu, 1));
4718         return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4719 }
4720 
4721 uint_t
4722 cpuid_getfamily(cpu_t *cpu)
4723 {
4724         ASSERT(cpuid_checkpass(cpu, 1));
4725         return (cpu->cpu_m.mcpu_cpi->cpi_family);
4726 }
4727 
4728 uint_t
4729 cpuid_getmodel(cpu_t *cpu)
4730 {
4731         ASSERT(cpuid_checkpass(cpu, 1));
4732         return (cpu->cpu_m.mcpu_cpi->cpi_model);
4733 }
4734 
4735 uint_t
4736 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4737 {
4738         ASSERT(cpuid_checkpass(cpu, 1));
4739         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4740 }
4741 
4742 uint_t
4743 cpuid_get_ncore_per_chip(cpu_t *cpu)
4744 {
4745         ASSERT(cpuid_checkpass(cpu, 1));
4746         return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4747 }
4748 
4749 uint_t
4750 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4751 {
4752         ASSERT(cpuid_checkpass(cpu, 2));
4753         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4754 }
4755 
4756 id_t
4757 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4758 {
4759         ASSERT(cpuid_checkpass(cpu, 2));
4760         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4761 }
4762 
4763 uint_t
4764 cpuid_getstep(cpu_t *cpu)
4765 {
4766         ASSERT(cpuid_checkpass(cpu, 1));
4767         return (cpu->cpu_m.mcpu_cpi->cpi_step);
4768 }
4769 
4770 uint_t
4771 cpuid_getsig(struct cpu *cpu)
4772 {
4773         ASSERT(cpuid_checkpass(cpu, 1));
4774         return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4775 }
4776 
4777 uint32_t
4778 cpuid_getchiprev(struct cpu *cpu)
4779 {
4780         ASSERT(cpuid_checkpass(cpu, 1));
4781         return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4782 }
4783 
4784 const char *
4785 cpuid_getchiprevstr(struct cpu *cpu)
4786 {
4787         ASSERT(cpuid_checkpass(cpu, 1));
4788         return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4789 }
4790 
4791 uint32_t
4792 cpuid_getsockettype(struct cpu *cpu)
4793 {
4794         ASSERT(cpuid_checkpass(cpu, 1));
4795         return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4796 }
4797 
4798 const char *
4799 cpuid_getsocketstr(cpu_t *cpu)
4800 {
4801         static const char *socketstr = NULL;
4802         struct cpuid_info *cpi;
4803 
4804         ASSERT(cpuid_checkpass(cpu, 1));
4805         cpi = cpu->cpu_m.mcpu_cpi;
4806 
4807         /* Assume that socket types are the same across the system */
4808         if (socketstr == NULL)
4809                 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4810                     cpi->cpi_model, cpi->cpi_step);
4811 
4812 
4813         return (socketstr);
4814 }
4815 
4816 int
4817 cpuid_get_chipid(cpu_t *cpu)
4818 {
4819         ASSERT(cpuid_checkpass(cpu, 1));
4820 
4821         if (cpuid_is_cmt(cpu))
4822                 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4823         return (cpu->cpu_id);
4824 }
4825 
4826 id_t
4827 cpuid_get_coreid(cpu_t *cpu)
4828 {
4829         ASSERT(cpuid_checkpass(cpu, 1));
4830         return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4831 }
4832 
4833 int
4834 cpuid_get_pkgcoreid(cpu_t *cpu)
4835 {
4836         ASSERT(cpuid_checkpass(cpu, 1));
4837         return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4838 }
4839 
4840 int
4841 cpuid_get_clogid(cpu_t *cpu)
4842 {
4843         ASSERT(cpuid_checkpass(cpu, 1));
4844         return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4845 }
4846 
4847 int
4848 cpuid_get_cacheid(cpu_t *cpu)
4849 {
4850         ASSERT(cpuid_checkpass(cpu, 1));
4851         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4852 }
4853 
4854 uint_t
4855 cpuid_get_procnodeid(cpu_t *cpu)
4856 {
4857         ASSERT(cpuid_checkpass(cpu, 1));
4858         return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4859 }
4860 
4861 uint_t
4862 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4863 {
4864         ASSERT(cpuid_checkpass(cpu, 1));
4865         return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4866 }
4867 
4868 uint_t
4869 cpuid_get_compunitid(cpu_t *cpu)
4870 {
4871         ASSERT(cpuid_checkpass(cpu, 1));
4872         return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4873 }
4874 
4875 uint_t
4876 cpuid_get_cores_per_compunit(cpu_t *cpu)
4877 {
4878         ASSERT(cpuid_checkpass(cpu, 1));
4879         return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4880 }
4881 
4882 /*ARGSUSED*/
4883 int
4884 cpuid_have_cr8access(cpu_t *cpu)
4885 {
4886 #if defined(__amd64)
4887         return (1);
4888 #else
4889         struct cpuid_info *cpi;
4890 
4891         ASSERT(cpu != NULL);
4892         cpi = cpu->cpu_m.mcpu_cpi;
4893         if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4894             (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4895                 return (1);
4896         return (0);
4897 #endif
4898 }
4899 
4900 uint32_t
4901 cpuid_get_apicid(cpu_t *cpu)
4902 {
4903         ASSERT(cpuid_checkpass(cpu, 1));
4904         if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4905                 return (UINT32_MAX);
4906         } else {
4907                 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4908         }
4909 }
4910 
4911 void
4912 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4913 {
4914         struct cpuid_info *cpi;
4915 
4916         if (cpu == NULL)
4917                 cpu = CPU;
4918         cpi = cpu->cpu_m.mcpu_cpi;
4919 
4920         ASSERT(cpuid_checkpass(cpu, 1));
4921 
4922         if (pabits)
4923                 *pabits = cpi->cpi_pabits;
4924         if (vabits)
4925                 *vabits = cpi->cpi_vabits;
4926 }
4927 
4928 size_t
4929 cpuid_get_xsave_size()
4930 {
4931         return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4932             sizeof (struct xsave_state)));
4933 }
4934 
4935 /*
4936  * Return true if the CPUs on this system require 'pointer clearing' for the
4937  * floating point error pointer exception handling. In the past, this has been
4938  * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4939  * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4940  * feature bit and is reflected in the cpi_fp_amd_save member.
4941  */
4942 boolean_t
4943 cpuid_need_fp_excp_handling()
4944 {
4945         return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4946             cpuid_info0.cpi_fp_amd_save != 0);
4947 }
4948 
4949 /*
4950  * Returns the number of data TLB entries for a corresponding
4951  * pagesize.  If it can't be computed, or isn't known, the
4952  * routine returns zero.  If you ask about an architecturally
4953  * impossible pagesize, the routine will panic (so that the
4954  * hat implementor knows that things are inconsistent.)
4955  */
4956 uint_t
4957 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4958 {
4959         struct cpuid_info *cpi;
4960         uint_t dtlb_nent = 0;
4961 
4962         if (cpu == NULL)
4963                 cpu = CPU;
4964         cpi = cpu->cpu_m.mcpu_cpi;
4965 
4966         ASSERT(cpuid_checkpass(cpu, 1));
4967 
4968         /*
4969          * Check the L2 TLB info
4970          */
4971         if (cpi->cpi_xmaxeax >= 0x80000006) {
4972                 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4973 
4974                 switch (pagesize) {
4975 
4976                 case 4 * 1024:
4977                         /*
4978                          * All zero in the top 16 bits of the register
4979                          * indicates a unified TLB. Size is in low 16 bits.
4980                          */
4981                         if ((cp->cp_ebx & 0xffff0000) == 0)
4982                                 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4983                         else
4984                                 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4985                         break;
4986 
4987                 case 2 * 1024 * 1024:
4988                         if ((cp->cp_eax & 0xffff0000) == 0)
4989                                 dtlb_nent = cp->cp_eax & 0x0000ffff;
4990                         else
4991                                 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4992                         break;
4993 
4994                 default:
4995                         panic("unknown L2 pagesize");
4996                         /*NOTREACHED*/
4997                 }
4998         }
4999 
5000         if (dtlb_nent != 0)
5001                 return (dtlb_nent);
5002 
5003         /*
5004          * No L2 TLB support for this size, try L1.
5005          */
5006         if (cpi->cpi_xmaxeax >= 0x80000005) {
5007                 struct cpuid_regs *cp = &cpi->cpi_extd[5];
5008 
5009                 switch (pagesize) {
5010                 case 4 * 1024:
5011                         dtlb_nent = BITX(cp->cp_ebx, 23, 16);
5012                         break;
5013                 case 2 * 1024 * 1024:
5014                         dtlb_nent = BITX(cp->cp_eax, 23, 16);
5015                         break;
5016                 default:
5017                         panic("unknown L1 d-TLB pagesize");
5018                         /*NOTREACHED*/
5019                 }
5020         }
5021 
5022         return (dtlb_nent);
5023 }
5024 
5025 /*
5026  * Return 0 if the erratum is not present or not applicable, positive
5027  * if it is, and negative if the status of the erratum is unknown.
5028  *
5029  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
5030  * Processors" #25759, Rev 3.57, August 2005
5031  */
5032 int
5033 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
5034 {
5035         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
5036         uint_t eax;
5037 
5038         /*
5039          * Bail out if this CPU isn't an AMD CPU, or if it's
5040          * a legacy (32-bit) AMD CPU.
5041          */
5042         if (cpi->cpi_vendor != X86_VENDOR_AMD ||
5043             cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
5044             cpi->cpi_family == 6) {
5045                 return (0);
5046         }
5047 
5048         eax = cpi->cpi_std[1].cp_eax;
5049 
5050 #define SH_B0(eax)      (eax == 0xf40 || eax == 0xf50)
5051 #define SH_B3(eax)      (eax == 0xf51)
5052 #define B(eax)          (SH_B0(eax) || SH_B3(eax))
5053 
5054 #define SH_C0(eax)      (eax == 0xf48 || eax == 0xf58)
5055 
5056 #define SH_CG(eax)      (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5057 #define DH_CG(eax)      (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5058 #define CH_CG(eax)      (eax == 0xf82 || eax == 0xfb2)
5059 #define CG(eax)         (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5060 
5061 #define SH_D0(eax)      (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5062 #define DH_D0(eax)      (eax == 0x10fc0 || eax == 0x10ff0)
5063 #define CH_D0(eax)      (eax == 0x10f80 || eax == 0x10fb0)
5064 #define D0(eax)         (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5065 
5066 #define SH_E0(eax)      (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5067 #define JH_E1(eax)      (eax == 0x20f10)        /* JH8_E0 had 0x20f30 */
5068 #define DH_E3(eax)      (eax == 0x20fc0 || eax == 0x20ff0)
5069 #define SH_E4(eax)      (eax == 0x20f51 || eax == 0x20f71)
5070 #define BH_E4(eax)      (eax == 0x20fb1)
5071 #define SH_E5(eax)      (eax == 0x20f42)
5072 #define DH_E6(eax)      (eax == 0x20ff2 || eax == 0x20fc2)
5073 #define JH_E6(eax)      (eax == 0x20f12 || eax == 0x20f32)
5074 #define EX(eax)         (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5075                             SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5076                             DH_E6(eax) || JH_E6(eax))
5077 
5078 #define DR_AX(eax)      (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5079 #define DR_B0(eax)      (eax == 0x100f20)
5080 #define DR_B1(eax)      (eax == 0x100f21)
5081 #define DR_BA(eax)      (eax == 0x100f2a)
5082 #define DR_B2(eax)      (eax == 0x100f22)
5083 #define DR_B3(eax)      (eax == 0x100f23)
5084 #define RB_C0(eax)      (eax == 0x100f40)
5085 
5086         switch (erratum) {
5087         case 1:
5088                 return (cpi->cpi_family < 0x10);
5089         case 51:        /* what does the asterisk mean? */
5090                 return (B(eax) || SH_C0(eax) || CG(eax));
5091         case 52:
5092                 return (B(eax));
5093         case 57:
5094                 return (cpi->cpi_family <= 0x11);
5095         case 58:
5096                 return (B(eax));
5097         case 60:
5098                 return (cpi->cpi_family <= 0x11);
5099         case 61:
5100         case 62:
5101         case 63:
5102         case 64:
5103         case 65:
5104         case 66:
5105         case 68:
5106         case 69:
5107         case 70:
5108         case 71:
5109                 return (B(eax));
5110         case 72:
5111                 return (SH_B0(eax));
5112         case 74:
5113                 return (B(eax));
5114         case 75:
5115                 return (cpi->cpi_family < 0x10);
5116         case 76:
5117                 return (B(eax));
5118         case 77:
5119                 return (cpi->cpi_family <= 0x11);
5120         case 78:
5121                 return (B(eax) || SH_C0(eax));
5122         case 79:
5123                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5124         case 80:
5125         case 81:
5126         case 82:
5127                 return (B(eax));
5128         case 83:
5129                 return (B(eax) || SH_C0(eax) || CG(eax));
5130         case 85:
5131                 return (cpi->cpi_family < 0x10);
5132         case 86:
5133                 return (SH_C0(eax) || CG(eax));
5134         case 88:
5135 #if !defined(__amd64)
5136                 return (0);
5137 #else
5138                 return (B(eax) || SH_C0(eax));
5139 #endif
5140         case 89:
5141                 return (cpi->cpi_family < 0x10);
5142         case 90:
5143                 return (B(eax) || SH_C0(eax) || CG(eax));
5144         case 91:
5145         case 92:
5146                 return (B(eax) || SH_C0(eax));
5147         case 93:
5148                 return (SH_C0(eax));
5149         case 94:
5150                 return (B(eax) || SH_C0(eax) || CG(eax));
5151         case 95:
5152 #if !defined(__amd64)
5153                 return (0);
5154 #else
5155                 return (B(eax) || SH_C0(eax));
5156 #endif
5157         case 96:
5158                 return (B(eax) || SH_C0(eax) || CG(eax));
5159         case 97:
5160         case 98:
5161                 return (SH_C0(eax) || CG(eax));
5162         case 99:
5163                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5164         case 100:
5165                 return (B(eax) || SH_C0(eax));
5166         case 101:
5167         case 103:
5168                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5169         case 104:
5170                 return (SH_C0(eax) || CG(eax) || D0(eax));
5171         case 105:
5172         case 106:
5173         case 107:
5174                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5175         case 108:
5176                 return (DH_CG(eax));
5177         case 109:
5178                 return (SH_C0(eax) || CG(eax) || D0(eax));
5179         case 110:
5180                 return (D0(eax) || EX(eax));
5181         case 111:
5182                 return (CG(eax));
5183         case 112:
5184                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5185         case 113:
5186                 return (eax == 0x20fc0);
5187         case 114:
5188                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5189         case 115:
5190                 return (SH_E0(eax) || JH_E1(eax));
5191         case 116:
5192                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5193         case 117:
5194                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5195         case 118:
5196                 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5197                     JH_E6(eax));
5198         case 121:
5199                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5200         case 122:
5201                 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5202         case 123:
5203                 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5204         case 131:
5205                 return (cpi->cpi_family < 0x10);
5206         case 6336786:
5207 
5208                 /*
5209                  * Test for AdvPowerMgmtInfo.TscPStateInvariant
5210                  * if this is a K8 family or newer processor. We're testing for
5211                  * this 'erratum' to determine whether or not we have a constant
5212                  * TSC.
5213                  *
5214                  * Our current fix for this is to disable the C1-Clock ramping.
5215                  * However, this doesn't work on newer processor families nor
5216                  * does it work when virtualized as those devices don't exist.
5217                  */
5218                 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5219                         return (0);
5220                 }
5221 
5222                 if (CPI_FAMILY(cpi) == 0xf) {
5223                         struct cpuid_regs regs;
5224                         regs.cp_eax = 0x80000007;
5225                         (void) __cpuid_insn(&regs);
5226                         return (!(regs.cp_edx & 0x100));
5227                 }
5228                 return (0);
5229         case 6323525:
5230                 /*
5231                  * This erratum (K8 #147) is not present on family 10 and newer.
5232                  */
5233                 if (cpi->cpi_family >= 0x10) {
5234                         return (0);
5235                 }
5236                 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5237                     (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5238 
5239         case 6671130:
5240                 /*
5241                  * check for processors (pre-Shanghai) that do not provide
5242                  * optimal management of 1gb ptes in its tlb.
5243                  */
5244                 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5245 
5246         case 298:
5247                 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5248                     DR_B2(eax) || RB_C0(eax));
5249 
5250         case 721:
5251 #if defined(__amd64)
5252                 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5253 #else
5254                 return (0);
5255 #endif
5256 
5257         default:
5258                 return (-1);
5259 
5260         }
5261 }
5262 
5263 /*
5264  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5265  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5266  */
5267 int
5268 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5269 {
5270         struct cpuid_info       *cpi;
5271         uint_t                  osvwid;
5272         static int              osvwfeature = -1;
5273         uint64_t                osvwlength;
5274 
5275 
5276         cpi = cpu->cpu_m.mcpu_cpi;
5277 
5278         /* confirm OSVW supported */
5279         if (osvwfeature == -1) {
5280                 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5281         } else {
5282                 /* assert that osvw feature setting is consistent on all cpus */
5283                 ASSERT(osvwfeature ==
5284                     (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5285         }
5286         if (!osvwfeature)
5287                 return (-1);
5288 
5289         osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5290 
5291         switch (erratum) {
5292         case 298:       /* osvwid is 0 */
5293                 osvwid = 0;
5294                 if (osvwlength <= (uint64_t)osvwid) {
5295                         /* osvwid 0 is unknown */
5296                         return (-1);
5297                 }
5298 
5299                 /*
5300                  * Check the OSVW STATUS MSR to determine the state
5301                  * of the erratum where:
5302                  *   0 - fixed by HW
5303                  *   1 - BIOS has applied the workaround when BIOS
5304                  *   workaround is available. (Or for other errata,
5305                  *   OS workaround is required.)
5306                  * For a value of 1, caller will confirm that the
5307                  * erratum 298 workaround has indeed been applied by BIOS.
5308                  *
5309                  * A 1 may be set in cpus that have a HW fix
5310                  * in a mixed cpu system. Regarding erratum 298:
5311                  *   In a multiprocessor platform, the workaround above
5312                  *   should be applied to all processors regardless of
5313                  *   silicon revision when an affected processor is
5314                  *   present.
5315                  */
5316 
5317                 return (rdmsr(MSR_AMD_OSVW_STATUS +
5318                     (osvwid / OSVW_ID_CNT_PER_MSR)) &
5319                     (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5320 
5321         default:
5322                 return (-1);
5323         }
5324 }
5325 
5326 static const char assoc_str[] = "associativity";
5327 static const char line_str[] = "line-size";
5328 static const char size_str[] = "size";
5329 
5330 static void
5331 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5332     uint32_t val)
5333 {
5334         char buf[128];
5335 
5336         /*
5337          * ndi_prop_update_int() is used because it is desirable for
5338          * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5339          */
5340         if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5341                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5342 }
5343 
5344 /*
5345  * Intel-style cache/tlb description
5346  *
5347  * Standard cpuid level 2 gives a randomly ordered
5348  * selection of tags that index into a table that describes
5349  * cache and tlb properties.
5350  */
5351 
5352 static const char l1_icache_str[] = "l1-icache";
5353 static const char l1_dcache_str[] = "l1-dcache";
5354 static const char l2_cache_str[] = "l2-cache";
5355 static const char l3_cache_str[] = "l3-cache";
5356 static const char itlb4k_str[] = "itlb-4K";
5357 static const char dtlb4k_str[] = "dtlb-4K";
5358 static const char itlb2M_str[] = "itlb-2M";
5359 static const char itlb4M_str[] = "itlb-4M";
5360 static const char dtlb4M_str[] = "dtlb-4M";
5361 static const char dtlb24_str[] = "dtlb0-2M-4M";
5362 static const char itlb424_str[] = "itlb-4K-2M-4M";
5363 static const char itlb24_str[] = "itlb-2M-4M";
5364 static const char dtlb44_str[] = "dtlb-4K-4M";
5365 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5366 static const char sl2_cache_str[] = "sectored-l2-cache";
5367 static const char itrace_str[] = "itrace-cache";
5368 static const char sl3_cache_str[] = "sectored-l3-cache";
5369 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5370 
5371 static const struct cachetab {
5372         uint8_t         ct_code;
5373         uint8_t         ct_assoc;
5374         uint16_t        ct_line_size;
5375         size_t          ct_size;
5376         const char      *ct_label;
5377 } intel_ctab[] = {
5378         /*
5379          * maintain descending order!
5380          *
5381          * Codes ignored - Reason
5382          * ----------------------
5383          * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5384          * f0H/f1H - Currently we do not interpret prefetch size by design
5385          */
5386         { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5387         { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5388         { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5389         { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5390         { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5391         { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5392         { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5393         { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5394         { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5395         { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5396         { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5397         { 0xd0, 4, 64, 512*1024, l3_cache_str},
5398         { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5399         { 0xc0, 4, 0, 8, dtlb44_str },
5400         { 0xba, 4, 0, 64, dtlb4k_str },
5401         { 0xb4, 4, 0, 256, dtlb4k_str },
5402         { 0xb3, 4, 0, 128, dtlb4k_str },
5403         { 0xb2, 4, 0, 64, itlb4k_str },
5404         { 0xb0, 4, 0, 128, itlb4k_str },
5405         { 0x87, 8, 64, 1024*1024, l2_cache_str},
5406         { 0x86, 4, 64, 512*1024, l2_cache_str},
5407         { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5408         { 0x84, 8, 32, 1024*1024, l2_cache_str},
5409         { 0x83, 8, 32, 512*1024, l2_cache_str},
5410         { 0x82, 8, 32, 256*1024, l2_cache_str},
5411         { 0x80, 8, 64, 512*1024, l2_cache_str},
5412         { 0x7f, 2, 64, 512*1024, l2_cache_str},
5413         { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5414         { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5415         { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5416         { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5417         { 0x79, 8, 64, 128*1024, sl2_cache_str},
5418         { 0x78, 8, 64, 1024*1024, l2_cache_str},
5419         { 0x73, 8, 0, 64*1024, itrace_str},
5420         { 0x72, 8, 0, 32*1024, itrace_str},
5421         { 0x71, 8, 0, 16*1024, itrace_str},
5422         { 0x70, 8, 0, 12*1024, itrace_str},
5423         { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5424         { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5425         { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5426         { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5427         { 0x5d, 0, 0, 256, dtlb44_str},
5428         { 0x5c, 0, 0, 128, dtlb44_str},
5429         { 0x5b, 0, 0, 64, dtlb44_str},
5430         { 0x5a, 4, 0, 32, dtlb24_str},
5431         { 0x59, 0, 0, 16, dtlb4k_str},
5432         { 0x57, 4, 0, 16, dtlb4k_str},
5433         { 0x56, 4, 0, 16, dtlb4M_str},
5434         { 0x55, 0, 0, 7, itlb24_str},
5435         { 0x52, 0, 0, 256, itlb424_str},
5436         { 0x51, 0, 0, 128, itlb424_str},
5437         { 0x50, 0, 0, 64, itlb424_str},
5438         { 0x4f, 0, 0, 32, itlb4k_str},
5439         { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5440         { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5441         { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5442         { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5443         { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5444         { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5445         { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5446         { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5447         { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5448         { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5449         { 0x44, 4, 32, 1024*1024, l2_cache_str},
5450         { 0x43, 4, 32, 512*1024, l2_cache_str},
5451         { 0x42, 4, 32, 256*1024, l2_cache_str},
5452         { 0x41, 4, 32, 128*1024, l2_cache_str},
5453         { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5454         { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5455         { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5456         { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5457         { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5458         { 0x39, 4, 64, 128*1024, sl2_cache_str},
5459         { 0x30, 8, 64, 32*1024, l1_icache_str},
5460         { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5461         { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5462         { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5463         { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5464         { 0x22, 4, 64, 512*1024, sl3_cache_str},
5465         { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5466         { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5467         { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5468         { 0x0b, 4, 0, 4, itlb4M_str},
5469         { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5470         { 0x08, 4, 32, 16*1024, l1_icache_str},
5471         { 0x06, 4, 32, 8*1024, l1_icache_str},
5472         { 0x05, 4, 0, 32, dtlb4M_str},
5473         { 0x04, 4, 0, 8, dtlb4M_str},
5474         { 0x03, 4, 0, 64, dtlb4k_str},
5475         { 0x02, 4, 0, 2, itlb4M_str},
5476         { 0x01, 4, 0, 32, itlb4k_str},
5477         { 0 }
5478 };
5479 
5480 static const struct cachetab cyrix_ctab[] = {
5481         { 0x70, 4, 0, 32, "tlb-4K" },
5482         { 0x80, 4, 16, 16*1024, "l1-cache" },
5483         { 0 }
5484 };
5485 
5486 /*
5487  * Search a cache table for a matching entry
5488  */
5489 static const struct cachetab *
5490 find_cacheent(const struct cachetab *ct, uint_t code)
5491 {
5492         if (code != 0) {
5493                 for (; ct->ct_code != 0; ct++)
5494                         if (ct->ct_code <= code)
5495                                 break;
5496                 if (ct->ct_code == code)
5497                         return (ct);
5498         }
5499         return (NULL);
5500 }
5501 
5502 /*
5503  * Populate cachetab entry with L2 or L3 cache-information using
5504  * cpuid function 4. This function is called from intel_walk_cacheinfo()
5505  * when descriptor 0x49 is encountered. It returns 0 if no such cache
5506  * information is found.
5507  */
5508 static int
5509 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5510 {
5511         uint32_t level, i;
5512         int ret = 0;
5513 
5514         for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5515                 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5516 
5517                 if (level == 2 || level == 3) {
5518                         ct->ct_assoc =
5519                             CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5520                         ct->ct_line_size =
5521                             CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5522                         ct->ct_size = ct->ct_assoc *
5523                             (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5524                             ct->ct_line_size *
5525                             (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5526 
5527                         if (level == 2) {
5528                                 ct->ct_label = l2_cache_str;
5529                         } else if (level == 3) {
5530                                 ct->ct_label = l3_cache_str;
5531                         }
5532                         ret = 1;
5533                 }
5534         }
5535 
5536         return (ret);
5537 }
5538 
5539 /*
5540  * Walk the cacheinfo descriptor, applying 'func' to every valid element
5541  * The walk is terminated if the walker returns non-zero.
5542  */
5543 static void
5544 intel_walk_cacheinfo(struct cpuid_info *cpi,
5545     void *arg, int (*func)(void *, const struct cachetab *))
5546 {
5547         const struct cachetab *ct;
5548         struct cachetab des_49_ct, des_b1_ct;
5549         uint8_t *dp;
5550         int i;
5551 
5552         if ((dp = cpi->cpi_cacheinfo) == NULL)
5553                 return;
5554         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5555                 /*
5556                  * For overloaded descriptor 0x49 we use cpuid function 4
5557                  * if supported by the current processor, to create
5558                  * cache information.
5559                  * For overloaded descriptor 0xb1 we use X86_PAE flag
5560                  * to disambiguate the cache information.
5561                  */
5562                 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5563                     intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5564                                 ct = &des_49_ct;
5565                 } else if (*dp == 0xb1) {
5566                         des_b1_ct.ct_code = 0xb1;
5567                         des_b1_ct.ct_assoc = 4;
5568                         des_b1_ct.ct_line_size = 0;
5569                         if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5570                                 des_b1_ct.ct_size = 8;
5571                                 des_b1_ct.ct_label = itlb2M_str;
5572                         } else {
5573                                 des_b1_ct.ct_size = 4;
5574                                 des_b1_ct.ct_label = itlb4M_str;
5575                         }
5576                         ct = &des_b1_ct;
5577                 } else {
5578                         if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5579                                 continue;
5580                         }
5581                 }
5582 
5583                 if (func(arg, ct) != 0) {
5584                         break;
5585                 }
5586         }
5587 }
5588 
5589 /*
5590  * (Like the Intel one, except for Cyrix CPUs)
5591  */
5592 static void
5593 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5594     void *arg, int (*func)(void *, const struct cachetab *))
5595 {
5596         const struct cachetab *ct;
5597         uint8_t *dp;
5598         int i;
5599 
5600         if ((dp = cpi->cpi_cacheinfo) == NULL)
5601                 return;
5602         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5603                 /*
5604                  * Search Cyrix-specific descriptor table first ..
5605                  */
5606                 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5607                         if (func(arg, ct) != 0)
5608                                 break;
5609                         continue;
5610                 }
5611                 /*
5612                  * .. else fall back to the Intel one
5613                  */
5614                 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5615                         if (func(arg, ct) != 0)
5616                                 break;
5617                         continue;
5618                 }
5619         }
5620 }
5621 
5622 /*
5623  * A cacheinfo walker that adds associativity, line-size, and size properties
5624  * to the devinfo node it is passed as an argument.
5625  */
5626 static int
5627 add_cacheent_props(void *arg, const struct cachetab *ct)
5628 {
5629         dev_info_t *devi = arg;
5630 
5631         add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5632         if (ct->ct_line_size != 0)
5633                 add_cache_prop(devi, ct->ct_label, line_str,
5634                     ct->ct_line_size);
5635         add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5636         return (0);
5637 }
5638 
5639 
5640 static const char fully_assoc[] = "fully-associative?";
5641 
5642 /*
5643  * AMD style cache/tlb description
5644  *
5645  * Extended functions 5 and 6 directly describe properties of
5646  * tlbs and various cache levels.
5647  */
5648 static void
5649 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5650 {
5651         switch (assoc) {
5652         case 0: /* reserved; ignore */
5653                 break;
5654         default:
5655                 add_cache_prop(devi, label, assoc_str, assoc);
5656                 break;
5657         case 0xff:
5658                 add_cache_prop(devi, label, fully_assoc, 1);
5659                 break;
5660         }
5661 }
5662 
5663 static void
5664 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5665 {
5666         if (size == 0)
5667                 return;
5668         add_cache_prop(devi, label, size_str, size);
5669         add_amd_assoc(devi, label, assoc);
5670 }
5671 
5672 static void
5673 add_amd_cache(dev_info_t *devi, const char *label,
5674     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5675 {
5676         if (size == 0 || line_size == 0)
5677                 return;
5678         add_amd_assoc(devi, label, assoc);
5679         /*
5680          * Most AMD parts have a sectored cache. Multiple cache lines are
5681          * associated with each tag. A sector consists of all cache lines
5682          * associated with a tag. For example, the AMD K6-III has a sector
5683          * size of 2 cache lines per tag.
5684          */
5685         if (lines_per_tag != 0)
5686                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5687         add_cache_prop(devi, label, line_str, line_size);
5688         add_cache_prop(devi, label, size_str, size * 1024);
5689 }
5690 
5691 static void
5692 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5693 {
5694         switch (assoc) {
5695         case 0: /* off */
5696                 break;
5697         case 1:
5698         case 2:
5699         case 4:
5700                 add_cache_prop(devi, label, assoc_str, assoc);
5701                 break;
5702         case 6:
5703                 add_cache_prop(devi, label, assoc_str, 8);
5704                 break;
5705         case 8:
5706                 add_cache_prop(devi, label, assoc_str, 16);
5707                 break;
5708         case 0xf:
5709                 add_cache_prop(devi, label, fully_assoc, 1);
5710                 break;
5711         default: /* reserved; ignore */
5712                 break;
5713         }
5714 }
5715 
5716 static void
5717 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5718 {
5719         if (size == 0 || assoc == 0)
5720                 return;
5721         add_amd_l2_assoc(devi, label, assoc);
5722         add_cache_prop(devi, label, size_str, size);
5723 }
5724 
5725 static void
5726 add_amd_l2_cache(dev_info_t *devi, const char *label,
5727     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5728 {
5729         if (size == 0 || assoc == 0 || line_size == 0)
5730                 return;
5731         add_amd_l2_assoc(devi, label, assoc);
5732         if (lines_per_tag != 0)
5733                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5734         add_cache_prop(devi, label, line_str, line_size);
5735         add_cache_prop(devi, label, size_str, size * 1024);
5736 }
5737 
5738 static void
5739 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5740 {
5741         struct cpuid_regs *cp;
5742 
5743         if (cpi->cpi_xmaxeax < 0x80000005)
5744                 return;
5745         cp = &cpi->cpi_extd[5];
5746 
5747         /*
5748          * 4M/2M L1 TLB configuration
5749          *
5750          * We report the size for 2M pages because AMD uses two
5751          * TLB entries for one 4M page.
5752          */
5753         add_amd_tlb(devi, "dtlb-2M",
5754             BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5755         add_amd_tlb(devi, "itlb-2M",
5756             BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5757 
5758         /*
5759          * 4K L1 TLB configuration
5760          */
5761 
5762         switch (cpi->cpi_vendor) {
5763                 uint_t nentries;
5764         case X86_VENDOR_TM:
5765                 if (cpi->cpi_family >= 5) {
5766                         /*
5767                          * Crusoe processors have 256 TLB entries, but
5768                          * cpuid data format constrains them to only
5769                          * reporting 255 of them.
5770                          */
5771                         if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5772                                 nentries = 256;
5773                         /*
5774                          * Crusoe processors also have a unified TLB
5775                          */
5776                         add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5777                             nentries);
5778                         break;
5779                 }
5780                 /*FALLTHROUGH*/
5781         default:
5782                 add_amd_tlb(devi, itlb4k_str,
5783                     BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5784                 add_amd_tlb(devi, dtlb4k_str,
5785                     BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5786                 break;
5787         }
5788 
5789         /*
5790          * data L1 cache configuration
5791          */
5792 
5793         add_amd_cache(devi, l1_dcache_str,
5794             BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5795             BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5796 
5797         /*
5798          * code L1 cache configuration
5799          */
5800 
5801         add_amd_cache(devi, l1_icache_str,
5802             BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5803             BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5804 
5805         if (cpi->cpi_xmaxeax < 0x80000006)
5806                 return;
5807         cp = &cpi->cpi_extd[6];
5808 
5809         /* Check for a unified L2 TLB for large pages */
5810 
5811         if (BITX(cp->cp_eax, 31, 16) == 0)
5812                 add_amd_l2_tlb(devi, "l2-tlb-2M",
5813                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5814         else {
5815                 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5816                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5817                 add_amd_l2_tlb(devi, "l2-itlb-2M",
5818                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5819         }
5820 
5821         /* Check for a unified L2 TLB for 4K pages */
5822 
5823         if (BITX(cp->cp_ebx, 31, 16) == 0) {
5824                 add_amd_l2_tlb(devi, "l2-tlb-4K",
5825                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5826         } else {
5827                 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5828                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5829                 add_amd_l2_tlb(devi, "l2-itlb-4K",
5830                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5831         }
5832 
5833         add_amd_l2_cache(devi, l2_cache_str,
5834             BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5835             BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5836 }
5837 
5838 /*
5839  * There are two basic ways that the x86 world describes it cache
5840  * and tlb architecture - Intel's way and AMD's way.
5841  *
5842  * Return which flavor of cache architecture we should use
5843  */
5844 static int
5845 x86_which_cacheinfo(struct cpuid_info *cpi)
5846 {
5847         switch (cpi->cpi_vendor) {
5848         case X86_VENDOR_Intel:
5849                 if (cpi->cpi_maxeax >= 2)
5850                         return (X86_VENDOR_Intel);
5851                 break;
5852         case X86_VENDOR_AMD:
5853                 /*
5854                  * The K5 model 1 was the first part from AMD that reported
5855                  * cache sizes via extended cpuid functions.
5856                  */
5857                 if (cpi->cpi_family > 5 ||
5858                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5859                         return (X86_VENDOR_AMD);
5860                 break;
5861         case X86_VENDOR_TM:
5862                 if (cpi->cpi_family >= 5)
5863                         return (X86_VENDOR_AMD);
5864                 /*FALLTHROUGH*/
5865         default:
5866                 /*
5867                  * If they have extended CPU data for 0x80000005
5868                  * then we assume they have AMD-format cache
5869                  * information.
5870                  *
5871                  * If not, and the vendor happens to be Cyrix,
5872                  * then try our-Cyrix specific handler.
5873                  *
5874                  * If we're not Cyrix, then assume we're using Intel's
5875                  * table-driven format instead.
5876                  */
5877                 if (cpi->cpi_xmaxeax >= 0x80000005)
5878                         return (X86_VENDOR_AMD);
5879                 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5880                         return (X86_VENDOR_Cyrix);
5881                 else if (cpi->cpi_maxeax >= 2)
5882                         return (X86_VENDOR_Intel);
5883                 break;
5884         }
5885         return (-1);
5886 }
5887 
5888 void
5889 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5890     struct cpuid_info *cpi)
5891 {
5892         dev_info_t *cpu_devi;
5893         int create;
5894 
5895         cpu_devi = (dev_info_t *)dip;
5896 
5897         /* device_type */
5898         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5899             "device_type", "cpu");
5900 
5901         /* reg */
5902         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5903             "reg", cpu_id);
5904 
5905         /* cpu-mhz, and clock-frequency */
5906         if (cpu_freq > 0) {
5907                 long long mul;
5908 
5909                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5910                     "cpu-mhz", cpu_freq);
5911                 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5912                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5913                             "clock-frequency", (int)mul);
5914         }
5915 
5916         if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5917                 return;
5918         }
5919 
5920         /* vendor-id */
5921         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5922             "vendor-id", cpi->cpi_vendorstr);
5923 
5924         if (cpi->cpi_maxeax == 0) {
5925                 return;
5926         }
5927 
5928         /*
5929          * family, model, and step
5930          */
5931         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5932             "family", CPI_FAMILY(cpi));
5933         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5934             "cpu-model", CPI_MODEL(cpi));
5935         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5936             "stepping-id", CPI_STEP(cpi));
5937 
5938         /* type */
5939         switch (cpi->cpi_vendor) {
5940         case X86_VENDOR_Intel:
5941                 create = 1;
5942                 break;
5943         default:
5944                 create = 0;
5945                 break;
5946         }
5947         if (create)
5948                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5949                     "type", CPI_TYPE(cpi));
5950 
5951         /* ext-family */
5952         switch (cpi->cpi_vendor) {
5953         case X86_VENDOR_Intel:
5954         case X86_VENDOR_AMD:
5955                 create = cpi->cpi_family >= 0xf;
5956                 break;
5957         default:
5958                 create = 0;
5959                 break;
5960         }
5961         if (create)
5962                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5963                     "ext-family", CPI_FAMILY_XTD(cpi));
5964 
5965         /* ext-model */
5966         switch (cpi->cpi_vendor) {
5967         case X86_VENDOR_Intel:
5968                 create = IS_EXTENDED_MODEL_INTEL(cpi);
5969                 break;
5970         case X86_VENDOR_AMD:
5971                 create = CPI_FAMILY(cpi) == 0xf;
5972                 break;
5973         default:
5974                 create = 0;
5975                 break;
5976         }
5977         if (create)
5978                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5979                     "ext-model", CPI_MODEL_XTD(cpi));
5980 
5981         /* generation */
5982         switch (cpi->cpi_vendor) {
5983         case X86_VENDOR_AMD:
5984                 /*
5985                  * AMD K5 model 1 was the first part to support this
5986                  */
5987                 create = cpi->cpi_xmaxeax >= 0x80000001;
5988                 break;
5989         default:
5990                 create = 0;
5991                 break;
5992         }
5993         if (create)
5994                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5995                     "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5996 
5997         /* brand-id */
5998         switch (cpi->cpi_vendor) {
5999         case X86_VENDOR_Intel:
6000                 /*
6001                  * brand id first appeared on Pentium III Xeon model 8,
6002                  * and Celeron model 8 processors and Opteron
6003                  */
6004                 create = cpi->cpi_family > 6 ||
6005                     (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
6006                 break;
6007         case X86_VENDOR_AMD:
6008                 create = cpi->cpi_family >= 0xf;
6009                 break;
6010         default:
6011                 create = 0;
6012                 break;
6013         }
6014         if (create && cpi->cpi_brandid != 0) {
6015                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6016                     "brand-id", cpi->cpi_brandid);
6017         }
6018 
6019         /* chunks, and apic-id */
6020         switch (cpi->cpi_vendor) {
6021                 /*
6022                  * first available on Pentium IV and Opteron (K8)
6023                  */
6024         case X86_VENDOR_Intel:
6025                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6026                 break;
6027         case X86_VENDOR_AMD:
6028                 create = cpi->cpi_family >= 0xf;
6029                 break;
6030         default:
6031                 create = 0;
6032                 break;
6033         }
6034         if (create) {
6035                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6036                     "chunks", CPI_CHUNKS(cpi));
6037                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6038                     "apic-id", cpi->cpi_apicid);
6039                 if (cpi->cpi_chipid >= 0) {
6040                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6041                             "chip#", cpi->cpi_chipid);
6042                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6043                             "clog#", cpi->cpi_clogid);
6044                 }
6045         }
6046 
6047         /* cpuid-features */
6048         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6049             "cpuid-features", CPI_FEATURES_EDX(cpi));
6050 
6051 
6052         /* cpuid-features-ecx */
6053         switch (cpi->cpi_vendor) {
6054         case X86_VENDOR_Intel:
6055                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6056                 break;
6057         case X86_VENDOR_AMD:
6058                 create = cpi->cpi_family >= 0xf;
6059                 break;
6060         default:
6061                 create = 0;
6062                 break;
6063         }
6064         if (create)
6065                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6066                     "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6067 
6068         /* ext-cpuid-features */
6069         switch (cpi->cpi_vendor) {
6070         case X86_VENDOR_Intel:
6071         case X86_VENDOR_AMD:
6072         case X86_VENDOR_Cyrix:
6073         case X86_VENDOR_TM:
6074         case X86_VENDOR_Centaur:
6075                 create = cpi->cpi_xmaxeax >= 0x80000001;
6076                 break;
6077         default:
6078                 create = 0;
6079                 break;
6080         }
6081         if (create) {
6082                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6083                     "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6084                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6085                     "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6086         }
6087 
6088         /*
6089          * Brand String first appeared in Intel Pentium IV, AMD K5
6090          * model 1, and Cyrix GXm.  On earlier models we try and
6091          * simulate something similar .. so this string should always
6092          * same -something- about the processor, however lame.
6093          */
6094         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6095             "brand-string", cpi->cpi_brandstr);
6096 
6097         /*
6098          * Finally, cache and tlb information
6099          */
6100         switch (x86_which_cacheinfo(cpi)) {
6101         case X86_VENDOR_Intel:
6102                 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6103                 break;
6104         case X86_VENDOR_Cyrix:
6105                 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6106                 break;
6107         case X86_VENDOR_AMD:
6108                 amd_cache_info(cpi, cpu_devi);
6109                 break;
6110         default:
6111                 break;
6112         }
6113 }
6114 
6115 struct l2info {
6116         int *l2i_csz;
6117         int *l2i_lsz;
6118         int *l2i_assoc;
6119         int l2i_ret;
6120 };
6121 
6122 /*
6123  * A cacheinfo walker that fetches the size, line-size and associativity
6124  * of the L2 cache
6125  */
6126 static int
6127 intel_l2cinfo(void *arg, const struct cachetab *ct)
6128 {
6129         struct l2info *l2i = arg;
6130         int *ip;
6131 
6132         if (ct->ct_label != l2_cache_str &&
6133             ct->ct_label != sl2_cache_str)
6134                 return (0);     /* not an L2 -- keep walking */
6135 
6136         if ((ip = l2i->l2i_csz) != NULL)
6137                 *ip = ct->ct_size;
6138         if ((ip = l2i->l2i_lsz) != NULL)
6139                 *ip = ct->ct_line_size;
6140         if ((ip = l2i->l2i_assoc) != NULL)
6141                 *ip = ct->ct_assoc;
6142         l2i->l2i_ret = ct->ct_size;
6143         return (1);             /* was an L2 -- terminate walk */
6144 }
6145 
6146 /*
6147  * AMD L2/L3 Cache and TLB Associativity Field Definition:
6148  *
6149  *      Unlike the associativity for the L1 cache and tlb where the 8 bit
6150  *      value is the associativity, the associativity for the L2 cache and
6151  *      tlb is encoded in the following table. The 4 bit L2 value serves as
6152  *      an index into the amd_afd[] array to determine the associativity.
6153  *      -1 is undefined. 0 is fully associative.
6154  */
6155 
6156 static int amd_afd[] =
6157         {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6158 
6159 static void
6160 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6161 {
6162         struct cpuid_regs *cp;
6163         uint_t size, assoc;
6164         int i;
6165         int *ip;
6166 
6167         if (cpi->cpi_xmaxeax < 0x80000006)
6168                 return;
6169         cp = &cpi->cpi_extd[6];
6170 
6171         if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6172             (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6173                 uint_t cachesz = size * 1024;
6174                 assoc = amd_afd[i];
6175 
6176                 ASSERT(assoc != -1);
6177 
6178                 if ((ip = l2i->l2i_csz) != NULL)
6179                         *ip = cachesz;
6180                 if ((ip = l2i->l2i_lsz) != NULL)
6181                         *ip = BITX(cp->cp_ecx, 7, 0);
6182                 if ((ip = l2i->l2i_assoc) != NULL)
6183                         *ip = assoc;
6184                 l2i->l2i_ret = cachesz;
6185         }
6186 }
6187 
6188 int
6189 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6190 {
6191         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6192         struct l2info __l2info, *l2i = &__l2info;
6193 
6194         l2i->l2i_csz = csz;
6195         l2i->l2i_lsz = lsz;
6196         l2i->l2i_assoc = assoc;
6197         l2i->l2i_ret = -1;
6198 
6199         switch (x86_which_cacheinfo(cpi)) {
6200         case X86_VENDOR_Intel:
6201                 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6202                 break;
6203         case X86_VENDOR_Cyrix:
6204                 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6205                 break;
6206         case X86_VENDOR_AMD:
6207                 amd_l2cacheinfo(cpi, l2i);
6208                 break;
6209         default:
6210                 break;
6211         }
6212         return (l2i->l2i_ret);
6213 }
6214 
6215 #if !defined(__xpv)
6216 
6217 uint32_t *
6218 cpuid_mwait_alloc(cpu_t *cpu)
6219 {
6220         uint32_t        *ret;
6221         size_t          mwait_size;
6222 
6223         ASSERT(cpuid_checkpass(CPU, 2));
6224 
6225         mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6226         if (mwait_size == 0)
6227                 return (NULL);
6228 
6229         /*
6230          * kmem_alloc() returns cache line size aligned data for mwait_size
6231          * allocations.  mwait_size is currently cache line sized.  Neither
6232          * of these implementation details are guarantied to be true in the
6233          * future.
6234          *
6235          * First try allocating mwait_size as kmem_alloc() currently returns
6236          * correctly aligned memory.  If kmem_alloc() does not return
6237          * mwait_size aligned memory, then use mwait_size ROUNDUP.
6238          *
6239          * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6240          * decide to free this memory.
6241          */
6242         ret = kmem_zalloc(mwait_size, KM_SLEEP);
6243         if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6244                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6245                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6246                 *ret = MWAIT_RUNNING;
6247                 return (ret);
6248         } else {
6249                 kmem_free(ret, mwait_size);
6250                 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6251                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6252                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6253                 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6254                 *ret = MWAIT_RUNNING;
6255                 return (ret);
6256         }
6257 }
6258 
6259 void
6260 cpuid_mwait_free(cpu_t *cpu)
6261 {
6262         if (cpu->cpu_m.mcpu_cpi == NULL) {
6263                 return;
6264         }
6265 
6266         if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6267             cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6268                 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6269                     cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6270         }
6271 
6272         cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6273         cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6274 }
6275 
6276 void
6277 patch_tsc_read(int flag)
6278 {
6279         size_t cnt;
6280 
6281         switch (flag) {
6282         case TSC_NONE:
6283                 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6284                 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6285                 break;
6286         case TSC_RDTSC_MFENCE:
6287                 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6288                 (void) memcpy((void *)tsc_read,
6289                     (void *)&_tsc_mfence_start, cnt);
6290                 break;
6291         case TSC_RDTSC_LFENCE:
6292                 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6293                 (void) memcpy((void *)tsc_read,
6294                     (void *)&_tsc_lfence_start, cnt);
6295                 break;
6296         case TSC_TSCP:
6297                 cnt = &_tscp_end - &_tscp_start;
6298                 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6299                 break;
6300         default:
6301                 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6302                 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6303                 break;
6304         }
6305         tsc_type = flag;
6306 }
6307 
6308 int
6309 cpuid_deep_cstates_supported(void)
6310 {
6311         struct cpuid_info *cpi;
6312         struct cpuid_regs regs;
6313 
6314         ASSERT(cpuid_checkpass(CPU, 1));
6315 
6316         cpi = CPU->cpu_m.mcpu_cpi;
6317 
6318         if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6319                 return (0);
6320 
6321         switch (cpi->cpi_vendor) {
6322         case X86_VENDOR_Intel:
6323                 if (cpi->cpi_xmaxeax < 0x80000007)
6324                         return (0);
6325 
6326                 /*
6327                  * TSC run at a constant rate in all ACPI C-states?
6328                  */
6329                 regs.cp_eax = 0x80000007;
6330                 (void) __cpuid_insn(&regs);
6331                 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6332 
6333         default:
6334                 return (0);
6335         }
6336 }
6337 
6338 #endif  /* !__xpv */
6339 
6340 void
6341 post_startup_cpu_fixups(void)
6342 {
6343 #ifndef __xpv
6344         /*
6345          * Some AMD processors support C1E state. Entering this state will
6346          * cause the local APIC timer to stop, which we can't deal with at
6347          * this time.
6348          */
6349         if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6350                 on_trap_data_t otd;
6351                 uint64_t reg;
6352 
6353                 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6354                         reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6355                         /* Disable C1E state if it is enabled by BIOS */
6356                         if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6357                             AMD_ACTONCMPHALT_MASK) {
6358                                 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6359                                     AMD_ACTONCMPHALT_SHIFT);
6360                                 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6361                         }
6362                 }
6363                 no_trap();
6364         }
6365 #endif  /* !__xpv */
6366 }
6367 
6368 void
6369 enable_pcid(void)
6370 {
6371         if (x86_use_pcid == -1)
6372                 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6373 
6374         if (x86_use_invpcid == -1) {
6375                 x86_use_invpcid = is_x86_feature(x86_featureset,
6376                     X86FSET_INVPCID);
6377         }
6378 
6379         if (!x86_use_pcid)
6380                 return;
6381 
6382         /*
6383          * Intel say that on setting PCIDE, it immediately starts using the PCID
6384          * bits; better make sure there's nothing there.
6385          */
6386         ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6387 
6388         setcr4(getcr4() | CR4_PCIDE);
6389 }
6390 
6391 /*
6392  * Setup necessary registers to enable XSAVE feature on this processor.
6393  * This function needs to be called early enough, so that no xsave/xrstor
6394  * ops will execute on the processor before the MSRs are properly set up.
6395  *
6396  * Current implementation has the following assumption:
6397  * - cpuid_pass1() is done, so that X86 features are known.
6398  * - fpu_probe() is done, so that fp_save_mech is chosen.
6399  */
6400 void
6401 xsave_setup_msr(cpu_t *cpu)
6402 {
6403         ASSERT(fp_save_mech == FP_XSAVE);
6404         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6405 
6406         /* Enable OSXSAVE in CR4. */
6407         setcr4(getcr4() | CR4_OSXSAVE);
6408         /*
6409          * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6410          * correct value.
6411          */
6412         cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6413         setup_xfem();
6414 }
6415 
6416 /*
6417  * Starting with the Westmere processor the local
6418  * APIC timer will continue running in all C-states,
6419  * including the deepest C-states.
6420  */
6421 int
6422 cpuid_arat_supported(void)
6423 {
6424         struct cpuid_info *cpi;
6425         struct cpuid_regs regs;
6426 
6427         ASSERT(cpuid_checkpass(CPU, 1));
6428         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6429 
6430         cpi = CPU->cpu_m.mcpu_cpi;
6431 
6432         switch (cpi->cpi_vendor) {
6433         case X86_VENDOR_Intel:
6434                 /*
6435                  * Always-running Local APIC Timer is
6436                  * indicated by CPUID.6.EAX[2].
6437                  */
6438                 if (cpi->cpi_maxeax >= 6) {
6439                         regs.cp_eax = 6;
6440                         (void) cpuid_insn(NULL, &regs);
6441                         return (regs.cp_eax & CPUID_CSTATE_ARAT);
6442                 } else {
6443                         return (0);
6444                 }
6445         default:
6446                 return (0);
6447         }
6448 }
6449 
6450 /*
6451  * Check support for Intel ENERGY_PERF_BIAS feature
6452  */
6453 int
6454 cpuid_iepb_supported(struct cpu *cp)
6455 {
6456         struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6457         struct cpuid_regs regs;
6458 
6459         ASSERT(cpuid_checkpass(cp, 1));
6460 
6461         if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6462             !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6463                 return (0);
6464         }
6465 
6466         /*
6467          * Intel ENERGY_PERF_BIAS MSR is indicated by
6468          * capability bit CPUID.6.ECX.3
6469          */
6470         if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6471                 return (0);
6472 
6473         regs.cp_eax = 0x6;
6474         (void) cpuid_insn(NULL, &regs);
6475         return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6476 }
6477 
6478 /*
6479  * Check support for TSC deadline timer
6480  *
6481  * TSC deadline timer provides a superior software programming
6482  * model over local APIC timer that eliminates "time drifts".
6483  * Instead of specifying a relative time, software specifies an
6484  * absolute time as the target at which the processor should
6485  * generate a timer event.
6486  */
6487 int
6488 cpuid_deadline_tsc_supported(void)
6489 {
6490         struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6491         struct cpuid_regs regs;
6492 
6493         ASSERT(cpuid_checkpass(CPU, 1));
6494         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6495 
6496         switch (cpi->cpi_vendor) {
6497         case X86_VENDOR_Intel:
6498                 if (cpi->cpi_maxeax >= 1) {
6499                         regs.cp_eax = 1;
6500                         (void) cpuid_insn(NULL, &regs);
6501                         return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6502                 } else {
6503                         return (0);
6504                 }
6505         default:
6506                 return (0);
6507         }
6508 }
6509 
6510 #if defined(__amd64) && !defined(__xpv)
6511 /*
6512  * Patch in versions of bcopy for high performance Intel Nhm processors
6513  * and later...
6514  */
6515 void
6516 patch_memops(uint_t vendor)
6517 {
6518         size_t cnt, i;
6519         caddr_t to, from;
6520 
6521         if ((vendor == X86_VENDOR_Intel) &&
6522             is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6523                 cnt = &bcopy_patch_end - &bcopy_patch_start;
6524                 to = &bcopy_ck_size;
6525                 from = &bcopy_patch_start;
6526                 for (i = 0; i < cnt; i++) {
6527                         *to++ = *from++;
6528                 }
6529         }
6530 }
6531 #endif  /* __amd64 && !__xpv */
6532 
6533 /*
6534  * We're being asked to tell the system how many bits are required to represent
6535  * the various thread and strand IDs. While it's tempting to derive this based
6536  * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6537  * correct. Instead, this needs to be based on the number of bits that the APIC
6538  * allows for these different configurations. We only update these to a larger
6539  * value if we find one.
6540  */
6541 void
6542 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6543 {
6544         struct cpuid_info *cpi;
6545 
6546         VERIFY(cpuid_checkpass(CPU, 1));
6547         cpi = cpu->cpu_m.mcpu_cpi;
6548 
6549         if (cpi->cpi_ncore_bits > *core_nbits) {
6550                 *core_nbits = cpi->cpi_ncore_bits;
6551         }
6552 
6553         if (cpi->cpi_nthread_bits > *strand_nbits) {
6554                 *strand_nbits = cpi->cpi_nthread_bits;
6555         }
6556 }
6557 
6558 void
6559 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6560 {
6561         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6562         struct cpuid_regs cp;
6563 
6564         /*
6565          * Reread the CPUID portions that we need for various security
6566          * information.
6567          */
6568         if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6569                 /*
6570                  * Check if we now have leaf 7 available to us.
6571                  */
6572                 if (cpi->cpi_maxeax < 7) {
6573                         bzero(&cp, sizeof (cp));
6574                         cp.cp_eax = 0;
6575                         cpi->cpi_maxeax = __cpuid_insn(&cp);
6576                         if (cpi->cpi_maxeax < 7)
6577                                 return;
6578                 }
6579 
6580                 bzero(&cp, sizeof (cp));
6581                 cp.cp_eax = 7;
6582                 cp.cp_ecx = 0;
6583                 (void) __cpuid_insn(&cp);
6584                 cpi->cpi_std[7] = cp;
6585         } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6586                 /* No xcpuid support */
6587                 if (cpi->cpi_family < 5 ||
6588                     (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6589                         return;
6590 
6591                 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6592                         bzero(&cp, sizeof (cp));
6593                         cp.cp_eax = CPUID_LEAF_EXT_0;
6594                         cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6595                         if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6596                                 return;
6597                         }
6598                 }
6599 
6600                 bzero(&cp, sizeof (cp));
6601                 cp.cp_eax = CPUID_LEAF_EXT_8;
6602                 (void) __cpuid_insn(&cp);
6603                 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6604                 cpi->cpi_extd[8] = cp;
6605         } else {
6606                 /*
6607                  * Nothing to do here. Return an empty set which has already
6608                  * been zeroed for us.
6609                  */
6610                 return;
6611         }
6612         cpuid_scan_security(cpu, fset);
6613 }
6614 
6615 /* ARGSUSED */
6616 static int
6617 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6618 {
6619         uchar_t *fset;
6620 
6621         fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6622         cpuid_pass_ucode(CPU, fset);
6623 
6624         return (0);
6625 }
6626 
6627 /*
6628  * After a microcode update where the version has changed, then we need to
6629  * rescan CPUID. To do this we check every CPU to make sure that they have the
6630  * same microcode. Then we perform a cross call to all such CPUs. It's the
6631  * caller's job to make sure that no one else can end up doing an update while
6632  * this is going on.
6633  *
6634  * We assume that the system is microcode capable if we're called.
6635  */
6636 void
6637 cpuid_post_ucodeadm(void)
6638 {
6639         uint32_t rev;
6640         int i;
6641         struct cpu *cpu;
6642         cpuset_t cpuset;
6643         void *argdata;
6644         uchar_t *f0;
6645 
6646         argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6647 
6648         mutex_enter(&cpu_lock);
6649         cpu = cpu_get(0);
6650         rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6651         CPUSET_ONLY(cpuset, 0);
6652         for (i = 1; i < max_ncpus; i++) {
6653                 if ((cpu = cpu_get(i)) == NULL)
6654                         continue;
6655 
6656                 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6657                         panic("post microcode update CPU %d has differing "
6658                             "microcode revision (%u) from CPU 0 (%u)",
6659                             i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6660                 }
6661                 CPUSET_ADD(cpuset, i);
6662         }
6663 
6664         kpreempt_disable();
6665         xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6666             cpuid_post_ucodeadm_xc);
6667         kpreempt_enable();
6668 
6669         /*
6670          * OK, now look at each CPU and see if their feature sets are equal.
6671          */
6672         f0 = argdata;
6673         for (i = 1; i < max_ncpus; i++) {
6674                 uchar_t *fset;
6675                 if (!CPU_IN_SET(cpuset, i))
6676                         continue;
6677 
6678                 fset = (uchar_t *)((uintptr_t)argdata +
6679                     sizeof (x86_featureset) * i);
6680 
6681                 if (!compare_x86_featureset(f0, fset)) {
6682                         panic("Post microcode update CPU %d has "
6683                             "differing security feature (%p) set from CPU 0 "
6684                             "(%p), not appending to feature set", i,
6685                             (void *)fset, (void *)f0);
6686                 }
6687         }
6688 
6689         mutex_exit(&cpu_lock);
6690 
6691         for (i = 0; i < NUM_X86_FEATURES; i++) {
6692                 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6693                     x86_feature_names[i]);
6694                 if (is_x86_feature(f0, i)) {
6695                         add_x86_feature(x86_featureset, i);
6696                 }
6697         }
6698         kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6699 }