1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 */
27 /*
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
30 */
31 /*
32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 */
34 /*
35 * Copyright 2019, Joyent, Inc.
36 */
37
38 /*
39 * CPU Identification logic
40 *
41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
42 * with the identification of CPUs, their features, and their topologies. More
43 * specifically, this file helps drive the following:
44 *
45 * 1. Enumeration of features of the processor which are used by the kernel to
46 * determine what features to enable or disable. These may be instruction set
47 * enhancements or features that we use.
48 *
49 * 2. Enumeration of instruction set architecture (ISA) additions that userland
50 * will be told about through the auxiliary vector.
51 *
52 * 3. Understanding the physical topology of the CPU such as the number of
53 * caches, how many cores it has, whether or not it supports symmetric
54 * multi-processing (SMT), etc.
55 *
56 * ------------------------
57 * CPUID History and Basics
58 * ------------------------
59 *
60 * The cpuid instruction was added by Intel roughly around the time that the
61 * original Pentium was introduced. The purpose of cpuid was to tell in a
62 * programmatic fashion information about the CPU that previously was guessed
63 * at. For example, an important part of cpuid is that we can know what
64 * extensions to the ISA exist. If you use an invalid opcode you would get a
65 * #UD, so this method allows a program (whether a user program or the kernel)
66 * to determine what exists without crashing or getting a SIGILL. Of course,
67 * this was also during the era of the clones and the AMD Am5x86. The vendor
68 * name shows up first in cpuid for a reason.
69 *
70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
72 * its own meaning. The different leaves are broken down into different regions:
73 *
74 * [ 0, 7fffffff ] This region is called the 'basic'
75 * region. This region is generally defined
76 * by Intel, though some of the original
77 * portions have different meanings based
78 * on the manufacturer. These days, Intel
79 * adds most new features to this region.
80 * AMD adds non-Intel compatible
81 * information in the third, extended
82 * region. Intel uses this for everything
83 * including ISA extensions, CPU
84 * features, cache information, topology,
85 * and more.
86 *
87 * There is a hole carved out of this
88 * region which is reserved for
89 * hypervisors.
90 *
91 * [ 40000000, 4fffffff ] This region, which is found in the
92 * middle of the previous region, is
93 * explicitly promised to never be used by
94 * CPUs. Instead, it is used by hypervisors
95 * to communicate information about
96 * themselves to the operating system. The
97 * values and details are unique for each
98 * hypervisor.
99 *
100 * [ 80000000, ffffffff ] This region is called the 'extended'
101 * region. Some of the low leaves mirror
102 * parts of the basic leaves. This region
103 * has generally been used by AMD for
104 * various extensions. For example, AMD-
105 * specific information about caches,
106 * features, and topology are found in this
107 * region.
108 *
109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of
111 * the ranges, one of the primary things returned is the maximum valid leaf in
112 * that range. This allows for discovery of what range of CPUID is valid.
113 *
114 * The CPUs have potentially surprising behavior when using an invalid leaf or
115 * unimplemented leaf. If the requested leaf is within the valid basic or
116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
117 * set to zero. However, if you specify a leaf that is outside of a valid range,
118 * then instead it will be filled with the last valid _basic_ leaf. For example,
119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
120 * an invalid extended leaf will return the information for leaf 3.
121 *
122 * Some leaves are broken down into sub-leaves. This means that the value
123 * depends on both the leaf asked for in %eax and a secondary register. For
124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
125 * additional information. Or when getting topology information in leaf 0xb, the
126 * initial value in %ecx changes which level of the topology that you are
127 * getting information about.
128 *
129 * cpuid values are always kept to 32 bits regardless of whether or not the
130 * program is in 64-bit mode. When executing in 64-bit mode, the upper
131 * 32 bits of the register are always set to zero so that way the values are the
132 * same regardless of execution mode.
133 *
134 * ----------------------
135 * Identifying Processors
136 * ----------------------
137 *
138 * We can identify a processor in two steps. The first step looks at cpuid leaf
139 * 0. Leaf 0 contains the processor's vendor information. This is done by
140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
142 *
143 * From there, a processor is identified by a combination of three different
144 * values:
145 *
146 * 1. Family
147 * 2. Model
148 * 3. Stepping
149 *
150 * Each vendor uses the family and model to uniquely identify a processor. The
151 * way that family and model are changed depends on the vendor. For example,
152 * Intel has been using family 0x6 for almost all of their processor since the
153 * Pentium Pro/Pentium II era, often called the P6. The model is used to
154 * identify the exact processor. Different models are often used for the client
155 * (consumer) and server parts. Even though each processor often has major
156 * architectural differences, they still are considered the same family by
157 * Intel.
158 *
159 * On the other hand, each major AMD architecture generally has its own family.
160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
161 * the model number is used to help identify specific processors.
162 *
163 * The stepping is used to refer to a revision of a specific microprocessor. The
164 * term comes from equipment used to produce masks that are used to create
165 * integrated circuits.
166 *
167 * The information is present in leaf 1, %eax. In technical documentation you
168 * will see the terms extended model and extended family. The original family,
169 * model, and stepping fields were each 4 bits wide. If the values in either
170 * are 0xf, then one is to consult the extended model and extended family, which
171 * take previously reserved bits and allow for a larger number of models and add
172 * 0xf to them.
173 *
174 * When we process this information, we store the full family, model, and
175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and
176 * cpi_step, respectively. Whenever you are performing comparisons with the
177 * family, model, and stepping, you should use these members and not the raw
178 * values from cpuid. If you must use the raw values from cpuid directly, you
179 * must make sure that you add the extended model and family to the base model
180 * and family.
181 *
182 * In general, we do not use information about the family, model, and stepping
183 * to determine whether or not a feature is present; that is generally driven by
184 * specific leaves. However, when something we care about on the processor is
185 * not considered 'architectural' meaning that it is specific to a set of
186 * processors and not promised in the architecture model to be consistent from
187 * generation to generation, then we will fall back on this information. The
188 * most common cases where this comes up is when we have to workaround errata in
189 * the processor, are dealing with processor-specific features such as CPU
190 * performance counters, or we want to provide additional information for things
191 * such as fault management.
192 *
193 * While processors also do have a brand string, which is the name that people
194 * are familiar with when buying the processor, they are not meant for
195 * programmatic consumption. That is what the family, model, and stepping are
196 * for.
197 *
198 * ------------
199 * CPUID Passes
200 * ------------
201 *
202 * As part of performing feature detection, we break this into several different
203 * passes. The passes are as follows:
204 *
205 * Pass 0 This is a primordial pass done in locore.s to deal with
206 * Cyrix CPUs that don't support cpuid. The reality is that
207 * we likely don't run on them any more, but there is still
208 * logic for handling them.
209 *
210 * Pass 1 This is the primary pass and is responsible for doing a
211 * large number of different things:
212 *
213 * 1. Determine which vendor manufactured the CPU and
214 * determining the family, model, and stepping information.
215 *
216 * 2. Gathering a large number of feature flags to
217 * determine which features the CPU support and which
218 * indicate things that we need to do other work in the OS
219 * to enable. Features detected this way are added to the
220 * x86_featureset which can be queried to
221 * determine what we should do. This includes processing
222 * all of the basic and extended CPU features that we care
223 * about.
224 *
225 * 3. Determining the CPU's topology. This includes
226 * information about how many cores and threads are present
227 * in the package. It also is responsible for figuring out
228 * which logical CPUs are potentially part of the same core
229 * and what other resources they might share. For more
230 * information see the 'Topology' section.
231 *
232 * 4. Determining the set of CPU security-specific features
233 * that we need to worry about and determine the
234 * appropriate set of workarounds.
235 *
236 * Pass 1 on the boot CPU occurs before KMDB is started.
237 *
238 * Pass 2 The second pass is done after startup(). Here, we check
239 * other miscellaneous features. Most of this is gathering
240 * additional basic and extended features that we'll use in
241 * later passes or for debugging support.
242 *
243 * Pass 3 The third pass occurs after the kernel memory allocator
244 * has been fully initialized. This gathers information
245 * where we might need dynamic memory available for our
246 * uses. This includes several varying width leaves that
247 * have cache information and the processor's brand string.
248 *
249 * Pass 4 The fourth and final normal pass is performed after the
250 * kernel has brought most everything online. This is
251 * invoked from post_startup(). In this pass, we go through
252 * the set of features that we have enabled and turn that
253 * into the hardware auxiliary vector features that
254 * userland receives. This is used by userland, primarily
255 * by the run-time link-editor (RTLD), though userland
256 * software could also refer to it directly.
257 *
258 * Microcode After a microcode update, we do a selective rescan of
259 * the cpuid leaves to determine what features have
260 * changed. Microcode updates can provide more details
261 * about security related features to deal with issues like
262 * Spectre and L1TF. On occasion, vendors have violated
263 * their contract and removed bits. However, we don't try
264 * to detect that because that puts us in a situation that
265 * we really can't deal with. As such, the only thing we
266 * rescan are security related features today. See
267 * cpuid_pass_ucode().
268 *
269 * All of the passes (except pass 0) are run on all CPUs. However, for the most
270 * part we only care about what the boot CPU says about this information and use
271 * the other CPUs as a rough guide to sanity check that we have the same feature
272 * set.
273 *
274 * We do not support running multiple logical CPUs with disjoint, let alone
275 * different, feature sets.
276 *
277 * ------------------
278 * Processor Topology
279 * ------------------
280 *
281 * One of the important things that we need to do is to understand the topology
282 * of the underlying processor. When we say topology in this case, we're trying
283 * to understand the relationship between the logical CPUs that the operating
284 * system sees and the underlying physical layout. Different logical CPUs may
285 * share different resources which can have important consequences for the
286 * performance of the system. For example, they may share caches, execution
287 * units, and more.
288 *
289 * The topology of the processor changes from generation to generation and
290 * vendor to vendor. Along with that, different vendors use different
291 * terminology, and the operating system itself uses occasionally overlapping
292 * terminology. It's important to understand what this topology looks like so
293 * one can understand the different things that we try to calculate and
294 * determine.
295 *
296 * To get started, let's talk about a little bit of terminology that we've used
297 * so far, is used throughout this file, and is fairly generic across multiple
298 * vendors:
299 *
300 * CPU
301 * A central processing unit (CPU) refers to a logical and/or virtual
302 * entity that the operating system can execute instructions on. The
303 * underlying resources for this CPU may be shared between multiple
304 * entities; however, to the operating system it is a discrete unit.
305 *
306 * PROCESSOR and PACKAGE
307 *
308 * Generally, when we use the term 'processor' on its own, we are referring
309 * to the physical entity that one buys and plugs into a board. However,
310 * because processor has been overloaded and one might see it used to mean
311 * multiple different levels, we will instead use the term 'package' for
312 * the rest of this file. The term package comes from the electrical
313 * engineering side and refers to the physical entity that encloses the
314 * electronics inside. Strictly speaking the package can contain more than
315 * just the CPU, for example, on many processors it may also have what's
316 * called an 'integrated graphical processing unit (GPU)'. Because the
317 * package can encapsulate multiple units, it is the largest physical unit
318 * that we refer to.
319 *
320 * SOCKET
321 *
322 * A socket refers to unit on a system board (generally the motherboard)
323 * that can receive a package. A single package, or processor, is plugged
324 * into a single socket. A system may have multiple sockets. Often times,
325 * the term socket is used interchangeably with package and refers to the
326 * electrical component that has plugged in, and not the receptacle itself.
327 *
328 * CORE
329 *
330 * A core refers to the physical instantiation of a CPU, generally, with a
331 * full set of hardware resources available to it. A package may contain
332 * multiple cores inside of it or it may just have a single one. A
333 * processor with more than one core is often referred to as 'multi-core'.
334 * In illumos, we will use the feature X86FSET_CMP to refer to a system
335 * that has 'multi-core' processors.
336 *
337 * A core may expose a single logical CPU to the operating system, or it
338 * may expose multiple CPUs, which we call threads, defined below.
339 *
340 * Some resources may still be shared by cores in the same package. For
341 * example, many processors will share the level 3 cache between cores.
342 * Some AMD generations share hardware resources between cores. For more
343 * information on that see the section 'AMD Topology'.
344 *
345 * THREAD and STRAND
346 *
347 * In this file, generally a thread refers to a hardware resources and not
348 * the operating system's logical abstraction. A thread is always exposed
349 * as an independent logical CPU to the operating system. A thread belongs
350 * to a specific core. A core may have more than one thread. When that is
351 * the case, the threads that are part of the same core are often referred
352 * to as 'siblings'.
353 *
354 * When multiple threads exist, this is generally referred to as
355 * simultaneous multi-threading (SMT). When Intel introduced this in their
356 * processors they called it hyper-threading (HT). When multiple threads
357 * are active in a core, they split the resources of the core. For example,
358 * two threads may share the same set of hardware execution units.
359 *
360 * The operating system often uses the term 'strand' to refer to a thread.
361 * This helps disambiguate it from the software concept.
362 *
363 * CHIP
364 *
365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most
366 * base meaning, it is used to refer to a single integrated circuit, which
367 * may or may not be the only thing in the package. In illumos, when you
368 * see the term 'chip' it is almost always referring to the same thing as
369 * the 'package'. However, many vendors may use chip to refer to one of
370 * many integrated circuits that have been placed in the package. As an
371 * example, see the subsequent definition.
372 *
373 * To try and keep things consistent, we will only use chip when referring
374 * to the entire integrated circuit package, with the exception of the
375 * definition of multi-chip module (because it is in the name) and use the
376 * term 'die' when we want the more general, potential sub-component
377 * definition.
378 *
379 * DIE
380 *
381 * A die refers to an integrated circuit. Inside of the package there may
382 * be a single die or multiple dies. This is sometimes called a 'chip' in
383 * vendor's parlance, but in this file, we use the term die to refer to a
384 * subcomponent.
385 *
386 * MULTI-CHIP MODULE
387 *
388 * A multi-chip module (MCM) refers to putting multiple distinct chips that
389 * are connected together in the same package. When a multi-chip design is
390 * used, generally each chip is manufactured independently and then joined
391 * together in the package. For example, on AMD's Zen microarchitecture
392 * (family 0x17), the package contains several dies (the second meaning of
393 * chip from above) that are connected together.
394 *
395 * CACHE
396 *
397 * A cache is a part of the processor that maintains copies of recently
398 * accessed memory. Caches are split into levels and then into types.
399 * Commonly there are one to three levels, called level one, two, and
400 * three. The lower the level, the smaller it is, the closer it is to the
401 * execution units of the CPU, and the faster it is to access. The layout
402 * and design of the cache come in many different flavors, consult other
403 * resources for a discussion of those.
404 *
405 * Caches are generally split into two types, the instruction and data
406 * cache. The caches contain what their names suggest, the instruction
407 * cache has executable program text, while the data cache has all other
408 * memory that the processor accesses. As of this writing, data is kept
409 * coherent between all of the caches on x86, so if one modifies program
410 * text before it is executed, that will be in the data cache, and the
411 * instruction cache will be synchronized with that change when the
412 * processor actually executes those instructions. This coherency also
413 * covers the fact that data could show up in multiple caches.
414 *
415 * Generally, the lowest level caches are specific to a core. However, the
416 * last layer cache is shared between some number of cores. The number of
417 * CPUs sharing this last level cache is important. This has implications
418 * for the choices that the scheduler makes, as accessing memory that might
419 * be in a remote cache after thread migration can be quite expensive.
420 *
421 * Sometimes, the word cache is abbreviated with a '$', because in US
422 * English the word cache is pronounced the same as cash. So L1D$ refers to
423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used
424 * in the rest of this theory statement for clarity.
425 *
426 * MEMORY CONTROLLER
427 *
428 * The memory controller is a component that provides access to DRAM. Each
429 * memory controller can access a set number of DRAM channels. Each channel
430 * can have a number of DIMMs (sticks of memory) associated with it. A
431 * given package may have more than one memory controller. The association
432 * of the memory controller to a group of cores is important as it is
433 * cheaper to access memory on the controller that you are associated with.
434 *
435 * NUMA
436 *
437 * NUMA or non-uniform memory access, describes a way that systems are
438 * built. On x86, any processor core can address all of the memory in the
439 * system. However, When using multiple sockets or possibly within a
440 * multi-chip module, some of that memory is physically closer and some of
441 * it is further. Memory that is further away is more expensive to access.
442 * Consider the following image of multiple sockets with memory:
443 *
444 * +--------+ +--------+
445 * | DIMM A | +----------+ +----------+ | DIMM D |
446 * +--------+-+ | | | | +-+------+-+
447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
448 * +--------+-+ | | | | +-+------+-+
449 * | DIMM C | +----------+ +----------+ | DIMM F |
450 * +--------+ +--------+
451 *
452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to
454 * access DIMMs A-C and more expensive to access D-F as it has to go
455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
456 * D-F are cheaper than A-C. While the socket form is the most common, when
457 * using multi-chip modules, this can also sometimes occur. For another
458 * example of this that's more involved, see the AMD topology section.
459 *
460 *
461 * Intel Topology
462 * --------------
463 *
464 * Most Intel processors since Nehalem, (as of this writing the current gen
465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
466 * the package is a single monolithic die. MCMs currently aren't used. Most
467 * parts have three levels of caches, with the L3 cache being shared between
468 * all of the cores on the package. The L1/L2 cache is generally specific to
469 * an individual core. The following image shows at a simplified level what
470 * this looks like. The memory controller is commonly part of something called
471 * the 'Uncore', that used to be separate physical chips that were not a part of
472 * the package, but are now part of the same chip.
473 *
474 * +-----------------------------------------------------------------------+
475 * | Package |
476 * | +-------------------+ +-------------------+ +-------------------+ |
477 * | | Core | | Core | | Core | |
478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | |
480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | |
481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | |
482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | |
483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
484 * | | +--------------+ | | +--------------+ | | +--------------+ | |
485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | |
486 * | | +--------------+ | | +--------------+ | | +--------------+ | |
487 * | +-------------------+ +-------------------+ +-------------------+ |
488 * | +-------------------------------------------------------------------+ |
489 * | | Shared L3 Cache | |
490 * | +-------------------------------------------------------------------+ |
491 * | +-------------------------------------------------------------------+ |
492 * | | Memory Controller | |
493 * | +-------------------------------------------------------------------+ |
494 * +-----------------------------------------------------------------------+
495 *
496 * A side effect of this current architecture is that what we care about from a
497 * scheduling and topology perspective, is simplified. In general we care about
498 * understanding which logical CPUs are part of the same core and socket.
499 *
500 * To determine the relationship between threads and cores, Intel initially used
501 * the identifier in the advanced programmable interrupt controller (APIC). They
502 * also added cpuid leaf 4 to give additional information about the number of
503 * threads and CPUs in the processor. With the addition of x2apic (which
504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
505 * additional cpuid topology leaf 0xB was added.
506 *
507 * AMD Topology
508 * ------------
509 *
510 * When discussing AMD topology, we want to break this into three distinct
511 * generations of topology. There's the basic topology that has been used in
512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
513 * with family 0x15 (Bulldozer), and there's the topology that was introduced
514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth
515 * talking about.
516 *
517 * Until the introduction of family 0x17 (Zen), AMD did not implement something
518 * that they considered SMT. Whether or not the AMD processors have SMT
519 * influences many things including scheduling and reliability, availability,
520 * and serviceability (RAS) features.
521 *
522 * NODE
523 *
524 * AMD uses the term node to refer to a die that contains a number of cores
525 * and I/O resources. Depending on the processor family and model, more
526 * than one node can be present in the package. When there is more than one
527 * node this indicates a multi-chip module. Usually each node has its own
528 * access to memory and I/O devices. This is important and generally
529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a
530 * result, we track this relationship in the operating system.
531 *
532 * In processors with an L3 cache, the L3 cache is generally shared across
533 * the entire node, though the way this is carved up varies from generation
534 * to generation.
535 *
536 * BULLDOZER
537 *
538 * Starting with the Bulldozer family (0x15) and continuing until the
539 * introduction of the Zen microarchitecture, AMD introduced the idea of a
540 * compute unit. In a compute unit, two traditional cores share a number of
541 * hardware resources. Critically, they share the FPU, L1 instruction
542 * cache, and the L2 cache. Several compute units were then combined inside
543 * of a single node. Because the integer execution units, L1 data cache,
544 * and some other resources were not shared between the cores, AMD never
545 * considered this to be SMT.
546 *
547 * ZEN
548 *
549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module
550 * is called Zeppelin. These modules are similar to the idea of nodes used
551 * previously. Each of these nodes has two DRAM channels which all of the
552 * cores in the node can access uniformly. These nodes are linked together
553 * in the package, creating a NUMA environment.
554 *
555 * The Zeppelin die itself contains two different 'core complexes'. Each
556 * core complex consists of four cores which each have two threads, for a
557 * total of 8 logical CPUs per complex. Unlike other generations,
558 * where all the logical CPUs in a given node share the L3 cache, here each
559 * core complex has its own shared L3 cache.
560 *
561 * A further thing that we need to consider is that in some configurations,
562 * particularly with the Threadripper line of processors, not every die
563 * actually has its memory controllers wired up to actual memory channels.
564 * This means that some cores have memory attached to them and others
565 * don't.
566 *
567 * To put Zen in perspective, consider the following images:
568 *
569 * +--------------------------------------------------------+
570 * | Core Complex |
571 * | +-------------------+ +-------------------+ +---+ |
572 * | | Core +----+ | | Core +----+ | | | |
573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | |
574 * | | | Thread | +----+ | | | Thread | +----+ | | | |
575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | |
576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | |
577 * | | +--------+ +--+ | | +--------+ +--+ | | | |
578 * | +-------------------+ +-------------------+ | C | |
579 * | +-------------------+ +-------------------+ | a | |
580 * | | Core +----+ | | Core +----+ | | c | |
581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | |
582 * | | | Thread | +----+ | | | Thread | +----+ | | e | |
583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | |
584 * | | | Thread | |L1| | | | Thread | |L1| | | | |
585 * | | +--------+ +--+ | | +--------+ +--+ | | | |
586 * | +-------------------+ +-------------------+ +---+ |
587 * | |
588 * +--------------------------------------------------------+
589 *
590 * This first image represents a single Zen core complex that consists of four
591 * cores.
592 *
593 *
594 * +--------------------------------------------------------+
595 * | Zeppelin Die |
596 * | +--------------------------------------------------+ |
597 * | | I/O Units (PCIe, SATA, USB, etc.) | |
598 * | +--------------------------------------------------+ |
599 * | HH |
600 * | +-----------+ HH +-----------+ |
601 * | | | HH | | |
602 * | | Core |==========| Core | |
603 * | | Complex |==========| Complex | |
604 * | | | HH | | |
605 * | +-----------+ HH +-----------+ |
606 * | HH |
607 * | +--------------------------------------------------+ |
608 * | | Memory Controller | |
609 * | +--------------------------------------------------+ |
610 * | |
611 * +--------------------------------------------------------+
612 *
613 * This image represents a single Zeppelin Die. Note how both cores are
614 * connected to the same memory controller and I/O units. While each core
615 * complex has its own L3 cache as seen in the first image, they both have
616 * uniform access to memory.
617 *
618 *
619 * PP PP
620 * PP PP
621 * +----------PP---------------------PP---------+
622 * | PP PP |
623 * | +-----------+ +-----------+ |
624 * | | | | | |
625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
627 * | | | | | |
628 * | +-----------+ooo ...+-----------+ |
629 * | HH ooo ... HH |
630 * | HH oo.. HH |
631 * | HH ..oo HH |
632 * | HH ... ooo HH |
633 * | +-----------+... ooo+-----------+ |
634 * | | | | | |
635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
637 * | | | | | |
638 * | +-----------+ +-----------+ |
639 * | PP PP |
640 * +----------PP---------------------PP---------+
641 * PP PP
642 * PP PP
643 *
644 * This image represents a single Zen package. In this example, it has four
645 * Zeppelin dies, though some configurations only have a single one. In this
646 * example, each die is directly connected to the next. Also, each die is
647 * represented as being connected to memory by the 'M' character and connected
648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
649 * die is made up of two core complexes, we have multiple different NUMA
650 * domains that we care about for these systems.
651 *
652 * CPUID LEAVES
653 *
654 * There are a few different CPUID leaves that we can use to try and understand
655 * the actual state of the world. As part of the introduction of family 0xf, AMD
656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical
657 * processors that are in the system. Because families before Zen didn't have
658 * SMT, this was always the number of cores that were in the system. However, it
659 * should always be thought of as the number of logical threads to be consistent
660 * between generations. In addition we also get the size of the APIC ID that is
661 * used to represent the number of logical processors. This is important for
662 * deriving topology information.
663 *
664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
665 * bit between Bulldozer and later families, but it is quite useful in
666 * determining the topology information. Because this information has changed
667 * across family generations, it's worth calling out what these mean
668 * explicitly. The registers have the following meanings:
669 *
670 * %eax The APIC ID. The entire register is defined to have a 32-bit
671 * APIC ID, even though on systems without x2apic support, it will
672 * be limited to 8 bits.
673 *
674 * %ebx On Bulldozer-era systems this contains information about the
675 * number of cores that are in a compute unit (cores that share
676 * resources). It also contains a per-package compute unit ID that
677 * identifies which compute unit the logical CPU is a part of.
678 *
679 * On Zen-era systems this instead contains the number of threads
680 * per core and the ID of the core that the logical CPU is a part
681 * of. Note, this ID is unique only to the package, it is not
682 * globally unique across the entire system.
683 *
684 * %ecx This contains the number of nodes that exist in the package. It
685 * also contains an ID that identifies which node the logical CPU
686 * is a part of.
687 *
688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the
689 * cache layout to determine which logical CPUs are sharing which caches.
690 *
691 * illumos Topology
692 * ----------------
693 *
694 * Based on the above we synthesize the information into several different
695 * variables that we store in the 'struct cpuid_info'. We'll go into the details
696 * of what each member is supposed to represent and their uniqueness. In
697 * general, there are two levels of uniqueness that we care about. We care about
698 * an ID that is globally unique. That means that it will be unique across all
699 * entities in the system. For example, the default logical CPU ID is globally
700 * unique. On the other hand, there is some information that we only care about
701 * being unique within the context of a single package / socket. Here are the
702 * variables that we keep track of and their meaning.
703 *
704 * Several of the values that are asking for an identifier, with the exception
705 * of cpi_apicid, are allowed to be synthetic.
706 *
707 *
708 * cpi_apicid
709 *
710 * This is the value of the CPU's APIC id. This should be the full 32-bit
711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
712 * APIC ID. This value is globally unique between all logical CPUs across
713 * all packages. This is usually required by the APIC.
714 *
715 * cpi_chipid
716 *
717 * This value indicates the ID of the package that the logical CPU is a
718 * part of. This value is allowed to be synthetic. It is usually derived by
719 * taking the CPU's APIC ID and determining how many bits are used to
720 * represent CPU cores in the package. All logical CPUs that are part of
721 * the same package must have the same value.
722 *
723 * cpi_coreid
724 *
725 * This represents the ID of a CPU core. Two logical CPUs should only have
726 * the same cpi_coreid value if they are part of the same core. These
727 * values may be synthetic. On systems that support SMT, this value is
728 * usually derived from the APIC ID, otherwise it is often synthetic and
729 * just set to the value of the cpu_id in the cpu_t.
730 *
731 * cpi_pkgcoreid
732 *
733 * This is similar to the cpi_coreid in that logical CPUs that are part of
734 * the same core should have the same ID. The main difference is that these
735 * values are only required to be unique to a given socket.
736 *
737 * cpi_clogid
738 *
739 * This represents the logical ID of a logical CPU. This value should be
740 * unique within a given socket for each logical CPU. This is allowed to be
741 * synthetic, though it is usually based off of the CPU's apic ID. The
742 * broader system expects that logical CPUs that have are part of the same
743 * core have contiguous numbers. For example, if there were two threads per
744 * core, then the core IDs divided by two should be the same and the first
745 * modulus two should be zero and the second one. For example, IDs 4 and 5
746 * indicate two logical CPUs that are part of the same core. But IDs 5 and
747 * 6 represent two logical CPUs that are part of different cores.
748 *
749 * While it is common for the cpi_coreid and the cpi_clogid to be derived
750 * from the same source, strictly speaking, they don't have to be and the
751 * two values should be considered logically independent. One should not
752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
753 * some kind of relationship. While this is tempting, we've seen cases on
754 * AMD family 0xf where the system's cpu id is not related to its APIC ID.
755 *
756 * cpi_ncpu_per_chip
757 *
758 * This value indicates the total number of logical CPUs that exist in the
759 * physical package. Critically, this is not the number of logical CPUs
760 * that exist for just the single core.
761 *
762 * This value should be the same for all logical CPUs in the same package.
763 *
764 * cpi_ncore_per_chip
765 *
766 * This value indicates the total number of physical CPU cores that exist
767 * in the package. The system compares this value with cpi_ncpu_per_chip to
768 * determine if simultaneous multi-threading (SMT) is enabled. When
769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
770 * the X86FSET_HTT feature is not set. If this value is greater than one,
771 * than we consider the processor to have the feature X86FSET_CMP, to
772 * indicate that there is support for more than one core.
773 *
774 * This value should be the same for all logical CPUs in the same package.
775 *
776 * cpi_procnodes_per_pkg
777 *
778 * This value indicates the number of 'nodes' that exist in the package.
779 * When processors are actually a multi-chip module, this represents the
780 * number of such modules that exist in the package. Currently, on Intel
781 * based systems this member is always set to 1.
782 *
783 * This value should be the same for all logical CPUs in the same package.
784 *
785 * cpi_procnodeid
786 *
787 * This value indicates the ID of the node that the logical CPU is a part
788 * of. All logical CPUs that are in the same node must have the same value
789 * here. This value must be unique across all of the packages in the
790 * system. On Intel based systems, this is currently set to the value in
791 * cpi_chipid because there is only one node.
792 *
793 * cpi_cores_per_compunit
794 *
795 * This value indicates the number of cores that are part of a compute
796 * unit. See the AMD topology section for this. This member only has real
797 * meaning currently for AMD Bulldozer family processors. For all other
798 * processors, this should currently be set to 1.
799 *
800 * cpi_compunitid
801 *
802 * This indicates the compute unit that the logical CPU belongs to. For
803 * processors without AMD Bulldozer-style compute units this should be set
804 * to the value of cpi_coreid.
805 *
806 * cpi_ncpu_shr_last_cache
807 *
808 * This indicates the number of logical CPUs that are sharing the same last
809 * level cache. This value should be the same for all CPUs that are sharing
810 * that cache. The last cache refers to the cache that is closest to memory
811 * and furthest away from the CPU.
812 *
813 * cpi_last_lvl_cacheid
814 *
815 * This indicates the ID of the last cache that the logical CPU uses. This
816 * cache is often shared between multiple logical CPUs and is the cache
817 * that is closest to memory and furthest away from the CPU. This value
818 * should be the same for a group of logical CPUs only if they actually
819 * share the same last level cache. IDs should not overlap between
820 * packages.
821 *
822 * cpi_ncore_bits
823 *
824 * This indicates the number of bits that are required to represent all of
825 * the cores in the system. As cores are derived based on their APIC IDs,
826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
827 * this value to be larger than the actual number of IDs that are present
828 * in the system. This is used to size tables by the CMI framework. It is
829 * only filled in for Intel and AMD CPUs.
830 *
831 * cpi_nthread_bits
832 *
833 * This indicates the number of bits required to represent all of the IDs
834 * that cover the logical CPUs that exist on a given core. It's OK for this
835 * value to be larger than the actual number of IDs that are present in the
836 * system. This is used to size tables by the CMI framework. It is
837 * only filled in for Intel and AMD CPUs.
838 *
839 * -----------
840 * Hypervisors
841 * -----------
842 *
843 * If trying to manage the differences between vendors wasn't bad enough, it can
844 * get worse thanks to our friend hardware virtualization. Hypervisors are given
845 * the ability to interpose on all cpuid instructions and change them to suit
846 * their purposes. In general, this is necessary as the hypervisor wants to be
847 * able to present a more uniform set of features or not necessarily give the
848 * guest operating system kernel knowledge of all features so it can be
849 * more easily migrated between systems.
850 *
851 * When it comes to trying to determine topology information, this can be a
852 * double edged sword. When a hypervisor doesn't actually implement a cpuid
853 * leaf, it'll often return all zeros. Because of that, you'll often see various
854 * checks scattered about fields being non-zero before we assume we can use
855 * them.
856 *
857 * When it comes to topology information, the hypervisor is often incentivized
858 * to lie to you about topology. This is because it doesn't always actually
859 * guarantee that topology at all. The topology path we take in the system
860 * depends on how the CPU advertises itself. If it advertises itself as an Intel
861 * or AMD CPU, then we basically do our normal path. However, when they don't
862 * use an actual vendor, then that usually turns into multiple one-core CPUs
863 * that we enumerate that are often on different sockets. The actual behavior
864 * depends greatly on what the hypervisor actually exposes to us.
865 *
866 * --------------------
867 * Exposing Information
868 * --------------------
869 *
870 * We expose CPUID information in three different forms in the system.
871 *
872 * The first is through the x86_featureset variable. This is used in conjunction
873 * with the is_x86_feature() function. This is queried by x86-specific functions
874 * to determine which features are or aren't present in the system and to make
875 * decisions based upon them. For example, users of this include everything from
876 * parts of the system dedicated to reliability, availability, and
877 * serviceability (RAS), to making decisions about how to handle security
878 * mitigations, to various x86-specific drivers. General purpose or
879 * architecture independent drivers should never be calling this function.
880 *
881 * The second means is through the auxiliary vector. The auxiliary vector is a
882 * series of tagged data that the kernel passes down to a user program when it
883 * begins executing. This information is used to indicate to programs what
884 * instruction set extensions are present. For example, information about the
885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down
886 * since user programs cannot make use of it. However, things like the AVX
887 * instruction sets are. Programs use this information to make run-time
888 * decisions about what features they should use. As an example, the run-time
889 * link-editor (rtld) can relocate different functions depending on the hardware
890 * support available.
891 *
892 * The final form is through a series of accessor functions that all have the
893 * form cpuid_get*. This is used by a number of different subsystems in the
894 * kernel to determine more detailed information about what we're running on,
895 * topology information, etc. Some of these subsystems include processor groups
896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
897 * microcode, and performance monitoring. These functions all ASSERT that the
898 * CPU they're being called on has reached a certain cpuid pass. If the passes
899 * are rearranged, then this needs to be adjusted.
900 */
901
902 #include <sys/types.h>
903 #include <sys/archsystm.h>
904 #include <sys/x86_archext.h>
905 #include <sys/kmem.h>
906 #include <sys/systm.h>
907 #include <sys/cmn_err.h>
908 #include <sys/sunddi.h>
909 #include <sys/sunndi.h>
910 #include <sys/cpuvar.h>
911 #include <sys/processor.h>
912 #include <sys/sysmacros.h>
913 #include <sys/pg.h>
914 #include <sys/fp.h>
915 #include <sys/controlregs.h>
916 #include <sys/bitmap.h>
917 #include <sys/auxv_386.h>
918 #include <sys/memnode.h>
919 #include <sys/pci_cfgspace.h>
920 #include <sys/comm_page.h>
921 #include <sys/mach_mmu.h>
922 #include <sys/ucode.h>
923 #include <sys/tsc.h>
924
925 #ifdef __xpv
926 #include <sys/hypervisor.h>
927 #else
928 #include <sys/ontrap.h>
929 #endif
930
931 uint_t x86_vendor = X86_VENDOR_IntelClone;
932 uint_t x86_type = X86_TYPE_OTHER;
933 uint_t x86_clflush_size = 0;
934
935 #if defined(__xpv)
936 int x86_use_pcid = 0;
937 int x86_use_invpcid = 0;
938 #else
939 int x86_use_pcid = -1;
940 int x86_use_invpcid = -1;
941 #endif
942
943 uint_t pentiumpro_bug4046376;
944
945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
946
947 static char *x86_feature_names[NUM_X86_FEATURES] = {
948 "lgpg",
949 "tsc",
950 "msr",
951 "mtrr",
952 "pge",
953 "de",
954 "cmov",
955 "mmx",
956 "mca",
957 "pae",
958 "cv8",
959 "pat",
960 "sep",
961 "sse",
962 "sse2",
963 "htt",
964 "asysc",
965 "nx",
966 "sse3",
967 "cx16",
968 "cmp",
969 "tscp",
970 "mwait",
971 "sse4a",
972 "cpuid",
973 "ssse3",
974 "sse4_1",
975 "sse4_2",
976 "1gpg",
977 "clfsh",
978 "64",
979 "aes",
980 "pclmulqdq",
981 "xsave",
982 "avx",
983 "vmx",
984 "svm",
985 "topoext",
986 "f16c",
987 "rdrand",
988 "x2apic",
989 "avx2",
990 "bmi1",
991 "bmi2",
992 "fma",
993 "smep",
994 "smap",
995 "adx",
996 "rdseed",
997 "mpx",
998 "avx512f",
999 "avx512dq",
1000 "avx512pf",
1001 "avx512er",
1002 "avx512cd",
1003 "avx512bw",
1004 "avx512vl",
1005 "avx512fma",
1006 "avx512vbmi",
1007 "avx512_vpopcntdq",
1008 "avx512_4vnniw",
1009 "avx512_4fmaps",
1010 "xsaveopt",
1011 "xsavec",
1012 "xsaves",
1013 "sha",
1014 "umip",
1015 "pku",
1016 "ospke",
1017 "pcid",
1018 "invpcid",
1019 "ibrs",
1020 "ibpb",
1021 "stibp",
1022 "ssbd",
1023 "ssbd_virt",
1024 "rdcl_no",
1025 "ibrs_all",
1026 "rsba",
1027 "ssb_no",
1028 "stibp_all",
1029 "flush_cmd",
1030 "l1d_vmentry_no",
1031 "fsgsbase",
1032 "clflushopt",
1033 "clwb",
1034 "monitorx",
1035 "clzero",
1036 "xop",
1037 "fma4",
1038 "tbm",
1039 "avx512_vnni",
1040 "amd_pcec"
1041 };
1042
1043 boolean_t
1044 is_x86_feature(void *featureset, uint_t feature)
1045 {
1046 ASSERT(feature < NUM_X86_FEATURES);
1047 return (BT_TEST((ulong_t *)featureset, feature));
1048 }
1049
1050 void
1051 add_x86_feature(void *featureset, uint_t feature)
1052 {
1053 ASSERT(feature < NUM_X86_FEATURES);
1054 BT_SET((ulong_t *)featureset, feature);
1055 }
1056
1057 void
1058 remove_x86_feature(void *featureset, uint_t feature)
1059 {
1060 ASSERT(feature < NUM_X86_FEATURES);
1061 BT_CLEAR((ulong_t *)featureset, feature);
1062 }
1063
1064 boolean_t
1065 compare_x86_featureset(void *setA, void *setB)
1066 {
1067 /*
1068 * We assume that the unused bits of the bitmap are always zero.
1069 */
1070 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1071 return (B_TRUE);
1072 } else {
1073 return (B_FALSE);
1074 }
1075 }
1076
1077 void
1078 print_x86_featureset(void *featureset)
1079 {
1080 uint_t i;
1081
1082 for (i = 0; i < NUM_X86_FEATURES; i++) {
1083 if (is_x86_feature(featureset, i)) {
1084 cmn_err(CE_CONT, "?x86_feature: %s\n",
1085 x86_feature_names[i]);
1086 }
1087 }
1088 }
1089
1090 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1091 static size_t xsave_state_size = 0;
1092 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1093 boolean_t xsave_force_disable = B_FALSE;
1094 extern int disable_smap;
1095
1096 /*
1097 * This is set to platform type we are running on.
1098 */
1099 static int platform_type = -1;
1100
1101 #if !defined(__xpv)
1102 /*
1103 * Variable to patch if hypervisor platform detection needs to be
1104 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1105 */
1106 int enable_platform_detection = 1;
1107 #endif
1108
1109 /*
1110 * monitor/mwait info.
1111 *
1112 * size_actual and buf_actual are the real address and size allocated to get
1113 * proper mwait_buf alignement. buf_actual and size_actual should be passed
1114 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
1115 * processor cache-line alignment, but this is not guarantied in the furture.
1116 */
1117 struct mwait_info {
1118 size_t mon_min; /* min size to avoid missed wakeups */
1119 size_t mon_max; /* size to avoid false wakeups */
1120 size_t size_actual; /* size actually allocated */
1121 void *buf_actual; /* memory actually allocated */
1122 uint32_t support; /* processor support of monitor/mwait */
1123 };
1124
1125 /*
1126 * xsave/xrestor info.
1127 *
1128 * This structure contains HW feature bits and the size of the xsave save area.
1129 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1130 * (xsave_state) to describe the xsave layout. However, at runtime the
1131 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1132 * xsave_state structure simply represents the legacy layout of the beginning
1133 * of the xsave area.
1134 */
1135 struct xsave_info {
1136 uint32_t xsav_hw_features_low; /* Supported HW features */
1137 uint32_t xsav_hw_features_high; /* Supported HW features */
1138 size_t xsav_max_size; /* max size save area for HW features */
1139 size_t ymm_size; /* AVX: size of ymm save area */
1140 size_t ymm_offset; /* AVX: offset for ymm save area */
1141 size_t bndregs_size; /* MPX: size of bndregs save area */
1142 size_t bndregs_offset; /* MPX: offset for bndregs save area */
1143 size_t bndcsr_size; /* MPX: size of bndcsr save area */
1144 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */
1145 size_t opmask_size; /* AVX512: size of opmask save */
1146 size_t opmask_offset; /* AVX512: offset for opmask save */
1147 size_t zmmlo_size; /* AVX512: size of zmm 256 save */
1148 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */
1149 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */
1150 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */
1151 };
1152
1153
1154 /*
1155 * These constants determine how many of the elements of the
1156 * cpuid we cache in the cpuid_info data structure; the
1157 * remaining elements are accessible via the cpuid instruction.
1158 */
1159
1160 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
1161 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
1162
1163 /*
1164 * See the big theory statement for a more detailed explanation of what some of
1165 * these members mean.
1166 */
1167 struct cpuid_info {
1168 uint_t cpi_pass; /* last pass completed */
1169 /*
1170 * standard function information
1171 */
1172 uint_t cpi_maxeax; /* fn 0: %eax */
1173 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
1174 uint_t cpi_vendor; /* enum of cpi_vendorstr */
1175
1176 uint_t cpi_family; /* fn 1: extended family */
1177 uint_t cpi_model; /* fn 1: extended model */
1178 uint_t cpi_step; /* fn 1: stepping */
1179 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
1180 /* AMD: package/socket # */
1181 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
1182 int cpi_clogid; /* fn 1: %ebx: thread # */
1183 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
1184 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
1185 uint_t cpi_ncache; /* fn 2: number of elements */
1186 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1187 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
1188 uint_t cpi_cache_leaf_size; /* Number of cache elements */
1189 /* Intel fn: 4, AMD fn: 8000001d */
1190 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */
1191 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
1192 /*
1193 * extended function information
1194 */
1195 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
1196 char cpi_brandstr[49]; /* fn 0x8000000[234] */
1197 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
1198 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
1199 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */
1200 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
1201
1202 id_t cpi_coreid; /* same coreid => strands share core */
1203 int cpi_pkgcoreid; /* core number within single package */
1204 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
1205 /* Intel: fn 4: %eax[31-26] */
1206
1207 /*
1208 * These values represent the number of bits that are required to store
1209 * information about the number of cores and threads.
1210 */
1211 uint_t cpi_ncore_bits;
1212 uint_t cpi_nthread_bits;
1213 /*
1214 * supported feature information
1215 */
1216 uint32_t cpi_support[6];
1217 #define STD_EDX_FEATURES 0
1218 #define AMD_EDX_FEATURES 1
1219 #define TM_EDX_FEATURES 2
1220 #define STD_ECX_FEATURES 3
1221 #define AMD_ECX_FEATURES 4
1222 #define STD_EBX_FEATURES 5
1223 /*
1224 * Synthesized information, where known.
1225 */
1226 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
1227 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
1228 uint32_t cpi_socket; /* Chip package/socket type */
1229
1230 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
1231 uint32_t cpi_apicid;
1232 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
1233 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
1234 /* Intel: 1 */
1235 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
1236 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
1237
1238 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
1239 };
1240
1241
1242 static struct cpuid_info cpuid_info0;
1243
1244 /*
1245 * These bit fields are defined by the Intel Application Note AP-485
1246 * "Intel Processor Identification and the CPUID Instruction"
1247 */
1248 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1249 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1250 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1251 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1252 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1253 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1254
1255 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
1256 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
1257 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
1258 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
1259 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
1260 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx)
1261 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx)
1262
1263 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1264 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1265 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1266 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1267
1268 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
1269 #define CPI_XMAXEAX_MAX 0x80000100
1270 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
1271 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
1272
1273 /*
1274 * Function 4 (Deterministic Cache Parameters) macros
1275 * Defined by Intel Application Note AP-485
1276 */
1277 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
1278 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
1279 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
1280 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
1281 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
1282 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
1283 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
1284
1285 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
1286 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
1287 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
1288
1289 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
1290
1291 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
1292
1293
1294 /*
1295 * A couple of shorthand macros to identify "later" P6-family chips
1296 * like the Pentium M and Core. First, the "older" P6-based stuff
1297 * (loosely defined as "pre-Pentium-4"):
1298 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1299 */
1300 #define IS_LEGACY_P6(cpi) ( \
1301 cpi->cpi_family == 6 && \
1302 (cpi->cpi_model == 1 || \
1303 cpi->cpi_model == 3 || \
1304 cpi->cpi_model == 5 || \
1305 cpi->cpi_model == 6 || \
1306 cpi->cpi_model == 7 || \
1307 cpi->cpi_model == 8 || \
1308 cpi->cpi_model == 0xA || \
1309 cpi->cpi_model == 0xB) \
1310 )
1311
1312 /* A "new F6" is everything with family 6 that's not the above */
1313 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1314
1315 /* Extended family/model support */
1316 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1317 cpi->cpi_family >= 0xf)
1318
1319 /*
1320 * Info for monitor/mwait idle loop.
1321 *
1322 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1323 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1324 * 2006.
1325 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1326 * Documentation Updates" #33633, Rev 2.05, December 2006.
1327 */
1328 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
1329 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
1330 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
1331 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1332 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
1333 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
1334 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1335 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1336 /*
1337 * Number of sub-cstates for a given c-state.
1338 */
1339 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
1340 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1341
1342 /*
1343 * XSAVE leaf 0xD enumeration
1344 */
1345 #define CPUID_LEAFD_2_YMM_OFFSET 576
1346 #define CPUID_LEAFD_2_YMM_SIZE 256
1347
1348 /*
1349 * Common extended leaf names to cut down on typos.
1350 */
1351 #define CPUID_LEAF_EXT_0 0x80000000
1352 #define CPUID_LEAF_EXT_8 0x80000008
1353 #define CPUID_LEAF_EXT_1d 0x8000001d
1354 #define CPUID_LEAF_EXT_1e 0x8000001e
1355
1356 /*
1357 * Functions we consune from cpuid_subr.c; don't publish these in a header
1358 * file to try and keep people using the expected cpuid_* interfaces.
1359 */
1360 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1361 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1362 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1363 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1364 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1365
1366 /*
1367 * Apply up various platform-dependent restrictions where the
1368 * underlying platform restrictions mean the CPU can be marked
1369 * as less capable than its cpuid instruction would imply.
1370 */
1371 #if defined(__xpv)
1372 static void
1373 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1374 {
1375 switch (eax) {
1376 case 1: {
1377 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1378 0 : CPUID_INTC_EDX_MCA;
1379 cp->cp_edx &=
1380 ~(mcamask |
1381 CPUID_INTC_EDX_PSE |
1382 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1383 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1384 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1385 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1386 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1387 break;
1388 }
1389
1390 case 0x80000001:
1391 cp->cp_edx &=
1392 ~(CPUID_AMD_EDX_PSE |
1393 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1394 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1395 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1396 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1397 CPUID_AMD_EDX_TSCP);
1398 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1399 break;
1400 default:
1401 break;
1402 }
1403
1404 switch (vendor) {
1405 case X86_VENDOR_Intel:
1406 switch (eax) {
1407 case 4:
1408 /*
1409 * Zero out the (ncores-per-chip - 1) field
1410 */
1411 cp->cp_eax &= 0x03fffffff;
1412 break;
1413 default:
1414 break;
1415 }
1416 break;
1417 case X86_VENDOR_AMD:
1418 switch (eax) {
1419
1420 case 0x80000001:
1421 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1422 break;
1423
1424 case CPUID_LEAF_EXT_8:
1425 /*
1426 * Zero out the (ncores-per-chip - 1) field
1427 */
1428 cp->cp_ecx &= 0xffffff00;
1429 break;
1430 default:
1431 break;
1432 }
1433 break;
1434 default:
1435 break;
1436 }
1437 }
1438 #else
1439 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
1440 #endif
1441
1442 /*
1443 * Some undocumented ways of patching the results of the cpuid
1444 * instruction to permit running Solaris 10 on future cpus that
1445 * we don't currently support. Could be set to non-zero values
1446 * via settings in eeprom.
1447 */
1448
1449 uint32_t cpuid_feature_ecx_include;
1450 uint32_t cpuid_feature_ecx_exclude;
1451 uint32_t cpuid_feature_edx_include;
1452 uint32_t cpuid_feature_edx_exclude;
1453
1454 /*
1455 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1456 */
1457 void
1458 cpuid_alloc_space(cpu_t *cpu)
1459 {
1460 /*
1461 * By convention, cpu0 is the boot cpu, which is set up
1462 * before memory allocation is available. All other cpus get
1463 * their cpuid_info struct allocated here.
1464 */
1465 ASSERT(cpu->cpu_id != 0);
1466 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1467 cpu->cpu_m.mcpu_cpi =
1468 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1469 }
1470
1471 void
1472 cpuid_free_space(cpu_t *cpu)
1473 {
1474 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1475 int i;
1476
1477 ASSERT(cpi != NULL);
1478 ASSERT(cpi != &cpuid_info0);
1479
1480 /*
1481 * Free up any cache leaf related dynamic storage. The first entry was
1482 * cached from the standard cpuid storage, so we should not free it.
1483 */
1484 for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1485 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1486 if (cpi->cpi_cache_leaf_size > 0)
1487 kmem_free(cpi->cpi_cache_leaves,
1488 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1489
1490 kmem_free(cpi, sizeof (*cpi));
1491 cpu->cpu_m.mcpu_cpi = NULL;
1492 }
1493
1494 #if !defined(__xpv)
1495 /*
1496 * Determine the type of the underlying platform. This is used to customize
1497 * initialization of various subsystems (e.g. TSC). determine_platform() must
1498 * only ever be called once to prevent two processors from seeing different
1499 * values of platform_type. Must be called before cpuid_pass1(), the earliest
1500 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1501 */
1502 void
1503 determine_platform(void)
1504 {
1505 struct cpuid_regs cp;
1506 uint32_t base;
1507 uint32_t regs[4];
1508 char *hvstr = (char *)regs;
1509
1510 ASSERT(platform_type == -1);
1511
1512 platform_type = HW_NATIVE;
1513
1514 if (!enable_platform_detection)
1515 return;
1516
1517 /*
1518 * If Hypervisor CPUID bit is set, try to determine hypervisor
1519 * vendor signature, and set platform type accordingly.
1520 *
1521 * References:
1522 * http://lkml.org/lkml/2008/10/1/246
1523 * http://kb.vmware.com/kb/1009458
1524 */
1525 cp.cp_eax = 0x1;
1526 (void) __cpuid_insn(&cp);
1527 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1528 cp.cp_eax = 0x40000000;
1529 (void) __cpuid_insn(&cp);
1530 regs[0] = cp.cp_ebx;
1531 regs[1] = cp.cp_ecx;
1532 regs[2] = cp.cp_edx;
1533 regs[3] = 0;
1534 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1535 platform_type = HW_XEN_HVM;
1536 return;
1537 }
1538 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1539 platform_type = HW_VMWARE;
1540 return;
1541 }
1542 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1543 platform_type = HW_KVM;
1544 return;
1545 }
1546 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1547 platform_type = HW_BHYVE;
1548 return;
1549 }
1550 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1551 platform_type = HW_MICROSOFT;
1552 } else {
1553 /*
1554 * Check older VMware hardware versions. VMware hypervisor is
1555 * detected by performing an IN operation to VMware hypervisor
1556 * port and checking that value returned in %ebx is VMware
1557 * hypervisor magic value.
1558 *
1559 * References: http://kb.vmware.com/kb/1009458
1560 */
1561 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1562 if (regs[1] == VMWARE_HVMAGIC) {
1563 platform_type = HW_VMWARE;
1564 return;
1565 }
1566 }
1567
1568 /*
1569 * Check Xen hypervisor. In a fully virtualized domain,
1570 * Xen's pseudo-cpuid function returns a string representing the
1571 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1572 * supported cpuid function. We need at least a (base + 2) leaf value
1573 * to do what we want to do. Try different base values, since the
1574 * hypervisor might use a different one depending on whether Hyper-V
1575 * emulation is switched on by default or not.
1576 */
1577 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1578 cp.cp_eax = base;
1579 (void) __cpuid_insn(&cp);
1580 regs[0] = cp.cp_ebx;
1581 regs[1] = cp.cp_ecx;
1582 regs[2] = cp.cp_edx;
1583 regs[3] = 0;
1584 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1585 cp.cp_eax >= (base + 2)) {
1586 platform_type &= ~HW_NATIVE;
1587 platform_type |= HW_XEN_HVM;
1588 return;
1589 }
1590 }
1591 }
1592
1593 int
1594 get_hwenv(void)
1595 {
1596 ASSERT(platform_type != -1);
1597 return (platform_type);
1598 }
1599
1600 int
1601 is_controldom(void)
1602 {
1603 return (0);
1604 }
1605
1606 #else
1607
1608 int
1609 get_hwenv(void)
1610 {
1611 return (HW_XEN_PV);
1612 }
1613
1614 int
1615 is_controldom(void)
1616 {
1617 return (DOMAIN_IS_INITDOMAIN(xen_info));
1618 }
1619
1620 #endif /* __xpv */
1621
1622 /*
1623 * Make sure that we have gathered all of the CPUID leaves that we might need to
1624 * determine topology. We assume that the standard leaf 1 has already been done
1625 * and that xmaxeax has already been calculated.
1626 */
1627 static void
1628 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1629 {
1630 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1631
1632 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1633 struct cpuid_regs *cp;
1634
1635 cp = &cpi->cpi_extd[8];
1636 cp->cp_eax = CPUID_LEAF_EXT_8;
1637 (void) __cpuid_insn(cp);
1638 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1639 }
1640
1641 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1642 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1643 struct cpuid_regs *cp;
1644
1645 cp = &cpi->cpi_extd[0x1e];
1646 cp->cp_eax = CPUID_LEAF_EXT_1e;
1647 (void) __cpuid_insn(cp);
1648 }
1649 }
1650
1651 /*
1652 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1653 * it to everything else. If not, and we're on an AMD system where 8000001e is
1654 * valid, then we use that. Othewrise, we fall back to the default value for the
1655 * APIC ID in leaf 1.
1656 */
1657 static uint32_t
1658 cpuid_gather_apicid(struct cpuid_info *cpi)
1659 {
1660 /*
1661 * Leaf B changes based on the arguments to it. Beacuse we don't cache
1662 * it, we need to gather it again.
1663 */
1664 if (cpi->cpi_maxeax >= 0xB) {
1665 struct cpuid_regs regs;
1666 struct cpuid_regs *cp;
1667
1668 cp = ®s;
1669 cp->cp_eax = 0xB;
1670 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1671 (void) __cpuid_insn(cp);
1672
1673 if (cp->cp_ebx != 0) {
1674 return (cp->cp_edx);
1675 }
1676 }
1677
1678 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1679 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1680 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1681 return (cpi->cpi_extd[0x1e].cp_eax);
1682 }
1683
1684 return (CPI_APIC_ID(cpi));
1685 }
1686
1687 /*
1688 * For AMD processors, attempt to calculate the number of chips and cores that
1689 * exist. The way that we do this varies based on the generation, because the
1690 * generations themselves have changed dramatically.
1691 *
1692 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1693 * However, with the advent of family 17h (Zen) it actually tells us the number
1694 * of threads, so we need to look at leaf 0x8000001e if available to determine
1695 * its value. Otherwise, for all prior families, the number of enabled cores is
1696 * the same as threads.
1697 *
1698 * If we do not have leaf 0x80000008, then we assume that this processor does
1699 * not have anything. AMD's older CPUID specification says there's no reason to
1700 * fall back to leaf 1.
1701 *
1702 * In some virtualization cases we will not have leaf 8000001e or it will be
1703 * zero. When that happens we assume the number of threads is one.
1704 */
1705 static void
1706 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1707 {
1708 uint_t nthreads, nthread_per_core;
1709
1710 nthreads = nthread_per_core = 1;
1711
1712 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1713 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1714 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1715 nthreads = CPI_CPU_COUNT(cpi);
1716 }
1717
1718 /*
1719 * For us to have threads, and know about it, we have to be at least at
1720 * family 17h and have the cpuid bit that says we have extended
1721 * topology.
1722 */
1723 if (cpi->cpi_family >= 0x17 &&
1724 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1725 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1726 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1727 }
1728
1729 *ncpus = nthreads;
1730 *ncores = nthreads / nthread_per_core;
1731 }
1732
1733 /*
1734 * Seed the initial values for the cores and threads for an Intel based
1735 * processor. These values will be overwritten if we detect that the processor
1736 * supports CPUID leaf 0xb.
1737 */
1738 static void
1739 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1740 {
1741 /*
1742 * Only seed the number of physical cores from the first level leaf 4
1743 * information. The number of threads there indicate how many share the
1744 * L1 cache, which may or may not have anything to do with the number of
1745 * logical CPUs per core.
1746 */
1747 if (cpi->cpi_maxeax >= 4) {
1748 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1749 } else {
1750 *ncores = 1;
1751 }
1752
1753 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1754 *ncpus = CPI_CPU_COUNT(cpi);
1755 } else {
1756 *ncpus = *ncores;
1757 }
1758 }
1759
1760 static boolean_t
1761 cpuid_leafB_getids(cpu_t *cpu)
1762 {
1763 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1764 struct cpuid_regs regs;
1765 struct cpuid_regs *cp;
1766
1767 if (cpi->cpi_maxeax < 0xB)
1768 return (B_FALSE);
1769
1770 cp = ®s;
1771 cp->cp_eax = 0xB;
1772 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1773
1774 (void) __cpuid_insn(cp);
1775
1776 /*
1777 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1778 * indicates that the extended topology enumeration leaf is
1779 * available.
1780 */
1781 if (cp->cp_ebx != 0) {
1782 uint32_t x2apic_id = 0;
1783 uint_t coreid_shift = 0;
1784 uint_t ncpu_per_core = 1;
1785 uint_t chipid_shift = 0;
1786 uint_t ncpu_per_chip = 1;
1787 uint_t i;
1788 uint_t level;
1789
1790 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1791 cp->cp_eax = 0xB;
1792 cp->cp_ecx = i;
1793
1794 (void) __cpuid_insn(cp);
1795 level = CPI_CPU_LEVEL_TYPE(cp);
1796
1797 if (level == 1) {
1798 x2apic_id = cp->cp_edx;
1799 coreid_shift = BITX(cp->cp_eax, 4, 0);
1800 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1801 } else if (level == 2) {
1802 x2apic_id = cp->cp_edx;
1803 chipid_shift = BITX(cp->cp_eax, 4, 0);
1804 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1805 }
1806 }
1807
1808 /*
1809 * cpi_apicid is taken care of in cpuid_gather_apicid.
1810 */
1811 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1812 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1813 ncpu_per_core;
1814 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1815 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1816 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1817 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1818 cpi->cpi_procnodeid = cpi->cpi_chipid;
1819 cpi->cpi_compunitid = cpi->cpi_coreid;
1820
1821 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1822 cpi->cpi_nthread_bits = coreid_shift;
1823 cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1824 }
1825
1826 return (B_TRUE);
1827 } else {
1828 return (B_FALSE);
1829 }
1830 }
1831
1832 static void
1833 cpuid_intel_getids(cpu_t *cpu, void *feature)
1834 {
1835 uint_t i;
1836 uint_t chipid_shift = 0;
1837 uint_t coreid_shift = 0;
1838 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1839
1840 /*
1841 * There are no compute units or processor nodes currently on Intel.
1842 * Always set these to one.
1843 */
1844 cpi->cpi_procnodes_per_pkg = 1;
1845 cpi->cpi_cores_per_compunit = 1;
1846
1847 /*
1848 * If cpuid Leaf B is present, use that to try and get this information.
1849 * It will be the most accurate for Intel CPUs.
1850 */
1851 if (cpuid_leafB_getids(cpu))
1852 return;
1853
1854 /*
1855 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1856 * and ncore_per_chip. These represent the largest power of two values
1857 * that we need to cover all of the IDs in the system. Therefore, we use
1858 * those values to seed the number of bits needed to cover information
1859 * in the case when leaf B is not available. These values will probably
1860 * be larger than required, but that's OK.
1861 */
1862 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1863 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1864
1865 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1866 chipid_shift++;
1867
1868 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1869 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1870
1871 if (is_x86_feature(feature, X86FSET_CMP)) {
1872 /*
1873 * Multi-core (and possibly multi-threaded)
1874 * processors.
1875 */
1876 uint_t ncpu_per_core;
1877 if (cpi->cpi_ncore_per_chip == 1)
1878 ncpu_per_core = cpi->cpi_ncpu_per_chip;
1879 else if (cpi->cpi_ncore_per_chip > 1)
1880 ncpu_per_core = cpi->cpi_ncpu_per_chip /
1881 cpi->cpi_ncore_per_chip;
1882 /*
1883 * 8bit APIC IDs on dual core Pentiums
1884 * look like this:
1885 *
1886 * +-----------------------+------+------+
1887 * | Physical Package ID | MC | HT |
1888 * +-----------------------+------+------+
1889 * <------- chipid -------->
1890 * <------- coreid --------------->
1891 * <--- clogid -->
1892 * <------>
1893 * pkgcoreid
1894 *
1895 * Where the number of bits necessary to
1896 * represent MC and HT fields together equals
1897 * to the minimum number of bits necessary to
1898 * store the value of cpi->cpi_ncpu_per_chip.
1899 * Of those bits, the MC part uses the number
1900 * of bits necessary to store the value of
1901 * cpi->cpi_ncore_per_chip.
1902 */
1903 for (i = 1; i < ncpu_per_core; i <<= 1)
1904 coreid_shift++;
1905 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1906 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1907 } else if (is_x86_feature(feature, X86FSET_HTT)) {
1908 /*
1909 * Single-core multi-threaded processors.
1910 */
1911 cpi->cpi_coreid = cpi->cpi_chipid;
1912 cpi->cpi_pkgcoreid = 0;
1913 } else {
1914 /*
1915 * Single-core single-thread processors.
1916 */
1917 cpi->cpi_coreid = cpu->cpu_id;
1918 cpi->cpi_pkgcoreid = 0;
1919 }
1920 cpi->cpi_procnodeid = cpi->cpi_chipid;
1921 cpi->cpi_compunitid = cpi->cpi_coreid;
1922 }
1923
1924 /*
1925 * Historically, AMD has had CMP chips with only a single thread per core.
1926 * However, starting in family 17h (Zen), this has changed and they now have
1927 * multiple threads. Our internal core id needs to be a unique value.
1928 *
1929 * To determine the core id of an AMD system, if we're from a family before 17h,
1930 * then we just use the cpu id, as that gives us a good value that will be
1931 * unique for each core. If instead, we're on family 17h or later, then we need
1932 * to do something more complicated. CPUID leaf 0x8000001e can tell us
1933 * how many threads are in the system. Based on that, we'll shift the APIC ID.
1934 * We can't use the normal core id in that leaf as it's only unique within the
1935 * socket, which is perfect for cpi_pkgcoreid, but not us.
1936 */
1937 static id_t
1938 cpuid_amd_get_coreid(cpu_t *cpu)
1939 {
1940 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1941
1942 if (cpi->cpi_family >= 0x17 &&
1943 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1944 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1945 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1946 if (nthreads > 1) {
1947 VERIFY3U(nthreads, ==, 2);
1948 return (cpi->cpi_apicid >> 1);
1949 }
1950 }
1951
1952 return (cpu->cpu_id);
1953 }
1954
1955 /*
1956 * IDs on AMD is a more challenging task. This is notable because of the
1957 * following two facts:
1958 *
1959 * 1. Before family 0x17 (Zen), there was no support for SMT and there was
1960 * also no way to get an actual unique core id from the system. As such, we
1961 * synthesize this case by using cpu->cpu_id. This scheme does not,
1962 * however, guarantee that sibling cores of a chip will have sequential
1963 * coreids starting at a multiple of the number of cores per chip - that is
1964 * usually the case, but if the ACPI MADT table is presented in a different
1965 * order then we need to perform a few more gymnastics for the pkgcoreid.
1966 *
1967 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1968 * called compute units. These compute units share the L1I cache, L2 cache,
1969 * and the FPU. To deal with this, a new topology leaf was added in
1970 * 0x8000001e. However, parts of this leaf have different meanings
1971 * once we get to family 0x17.
1972 */
1973
1974 static void
1975 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1976 {
1977 int i, first_half, coreidsz;
1978 uint32_t nb_caps_reg;
1979 uint_t node2_1;
1980 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1981 struct cpuid_regs *cp;
1982
1983 /*
1984 * Calculate the core id (this comes from hardware in family 0x17 if it
1985 * hasn't been stripped by virtualization). We always set the compute
1986 * unit id to the same value. Also, initialize the default number of
1987 * cores per compute unit and nodes per package. This will be
1988 * overwritten when we know information about a particular family.
1989 */
1990 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1991 cpi->cpi_compunitid = cpi->cpi_coreid;
1992 cpi->cpi_cores_per_compunit = 1;
1993 cpi->cpi_procnodes_per_pkg = 1;
1994
1995 /*
1996 * To construct the logical ID, we need to determine how many APIC IDs
1997 * are dedicated to the cores and threads. This is provided for us in
1998 * 0x80000008. However, if it's not present (say due to virtualization),
1999 * then we assume it's one. This should be present on all 64-bit AMD
2000 * processors. It was added in family 0xf (Hammer).
2001 */
2002 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2003 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2004
2005 /*
2006 * In AMD parlance chip is really a node while illumos
2007 * uses chip as equivalent to socket/package.
2008 */
2009 if (coreidsz == 0) {
2010 /* Use legacy method */
2011 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2012 coreidsz++;
2013 if (coreidsz == 0)
2014 coreidsz = 1;
2015 }
2016 } else {
2017 /* Assume single-core part */
2018 coreidsz = 1;
2019 }
2020 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2021
2022 /*
2023 * The package core ID varies depending on the family. For family 17h,
2024 * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2025 * can use the clogid as is. When family 17h is virtualized, the clogid
2026 * should be sufficient as if we don't have valid data in the leaf, then
2027 * we won't think we have SMT, in which case the cpi_clogid should be
2028 * sufficient.
2029 */
2030 if (cpi->cpi_family >= 0x17 &&
2031 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2032 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2033 cpi->cpi_extd[0x1e].cp_ebx != 0) {
2034 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2035 } else {
2036 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2037 }
2038
2039 /*
2040 * Obtain the node ID and compute unit IDs. If we're on family 0x15
2041 * (bulldozer) or newer, then we can derive all of this from leaf
2042 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2043 */
2044 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2045 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2046 cp = &cpi->cpi_extd[0x1e];
2047
2048 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2049 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2050
2051 /*
2052 * For Bulldozer-era CPUs, recalculate the compute unit
2053 * information.
2054 */
2055 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2056 cpi->cpi_cores_per_compunit =
2057 BITX(cp->cp_ebx, 15, 8) + 1;
2058 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2059 (cpi->cpi_ncore_per_chip /
2060 cpi->cpi_cores_per_compunit) *
2061 (cpi->cpi_procnodeid /
2062 cpi->cpi_procnodes_per_pkg);
2063 }
2064 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2065 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2066 } else if (cpi->cpi_family == 0x10) {
2067 /*
2068 * See if we are a multi-node processor.
2069 * All processors in the system have the same number of nodes
2070 */
2071 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
2072 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2073 /* Single-node */
2074 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2075 coreidsz);
2076 } else {
2077
2078 /*
2079 * Multi-node revision D (2 nodes per package
2080 * are supported)
2081 */
2082 cpi->cpi_procnodes_per_pkg = 2;
2083
2084 first_half = (cpi->cpi_pkgcoreid <=
2085 (cpi->cpi_ncore_per_chip/2 - 1));
2086
2087 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2088 /* We are BSP */
2089 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2090 } else {
2091
2092 /* We are AP */
2093 /* NodeId[2:1] bits to use for reading F3xe8 */
2094 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2095
2096 nb_caps_reg =
2097 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2098
2099 /*
2100 * Check IntNodeNum bit (31:30, but bit 31 is
2101 * always 0 on dual-node processors)
2102 */
2103 if (BITX(nb_caps_reg, 30, 30) == 0)
2104 cpi->cpi_procnodeid = node2_1 +
2105 !first_half;
2106 else
2107 cpi->cpi_procnodeid = node2_1 +
2108 first_half;
2109 }
2110 }
2111 } else {
2112 cpi->cpi_procnodeid = 0;
2113 }
2114
2115 cpi->cpi_chipid =
2116 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2117
2118 cpi->cpi_ncore_bits = coreidsz;
2119 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2120 cpi->cpi_ncore_per_chip);
2121 }
2122
2123 static void
2124 spec_l1d_flush_noop(void)
2125 {
2126 }
2127
2128 static void
2129 spec_l1d_flush_msr(void)
2130 {
2131 wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2132 }
2133
2134 void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
2135
2136 static void
2137 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2138 {
2139 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2140
2141 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2142 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2143 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2144 add_x86_feature(featureset, X86FSET_IBPB);
2145 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2146 add_x86_feature(featureset, X86FSET_IBRS);
2147 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2148 add_x86_feature(featureset, X86FSET_STIBP);
2149 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2150 add_x86_feature(featureset, X86FSET_IBRS_ALL);
2151 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2152 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2153 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2154 add_x86_feature(featureset, X86FSET_RSBA);
2155 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2156 add_x86_feature(featureset, X86FSET_SSBD);
2157 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2158 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2159 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2160 add_x86_feature(featureset, X86FSET_SSB_NO);
2161 } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2162 cpi->cpi_maxeax >= 7) {
2163 struct cpuid_regs *ecp;
2164 ecp = &cpi->cpi_std[7];
2165
2166 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2167 add_x86_feature(featureset, X86FSET_IBRS);
2168 add_x86_feature(featureset, X86FSET_IBPB);
2169 }
2170
2171 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2172 add_x86_feature(featureset, X86FSET_STIBP);
2173 }
2174
2175 /*
2176 * Don't read the arch caps MSR on xpv where we lack the
2177 * on_trap().
2178 */
2179 #ifndef __xpv
2180 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2181 on_trap_data_t otd;
2182
2183 /*
2184 * Be paranoid and assume we'll get a #GP.
2185 */
2186 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2187 uint64_t reg;
2188
2189 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2190 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2191 add_x86_feature(featureset,
2192 X86FSET_RDCL_NO);
2193 }
2194 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2195 add_x86_feature(featureset,
2196 X86FSET_IBRS_ALL);
2197 }
2198 if (reg & IA32_ARCH_CAP_RSBA) {
2199 add_x86_feature(featureset,
2200 X86FSET_RSBA);
2201 }
2202 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2203 add_x86_feature(featureset,
2204 X86FSET_L1D_VM_NO);
2205 }
2206 if (reg & IA32_ARCH_CAP_SSB_NO) {
2207 add_x86_feature(featureset,
2208 X86FSET_SSB_NO);
2209 }
2210 }
2211 no_trap();
2212 }
2213 #endif /* !__xpv */
2214
2215 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2216 add_x86_feature(featureset, X86FSET_SSBD);
2217
2218 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2219 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2220 }
2221
2222 if (cpu->cpu_id != 0)
2223 return;
2224
2225 /*
2226 * We're the boot CPU, so let's figure out our L1TF status.
2227 *
2228 * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
2229 * need to exclude with ht_acquire(), and we don't need to flush.
2230 */
2231 if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
2232 extern int ht_exclusion;
2233 ht_exclusion = 0;
2234 spec_l1d_flush = spec_l1d_flush_noop;
2235 membar_producer();
2236 return;
2237 }
2238
2239 /*
2240 * If HT is enabled, we will need HT exclusion, as well as the flush on
2241 * VM entry. If HT isn't enabled, we still need at least the flush for
2242 * the L1TF sequential case.
2243 *
2244 * However, if X86FSET_L1D_VM_NO is set, we're most likely running
2245 * inside a VM ourselves, and we don't need the flush.
2246 *
2247 * If we don't have the FLUSH_CMD available at all, we'd better just
2248 * hope HT is disabled.
2249 */
2250 if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2251 !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2252 spec_l1d_flush = spec_l1d_flush_msr;
2253 } else {
2254 spec_l1d_flush = spec_l1d_flush_noop;
2255 }
2256
2257 membar_producer();
2258 }
2259
2260 /*
2261 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2262 */
2263 void
2264 setup_xfem(void)
2265 {
2266 uint64_t flags = XFEATURE_LEGACY_FP;
2267
2268 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2269
2270 if (is_x86_feature(x86_featureset, X86FSET_SSE))
2271 flags |= XFEATURE_SSE;
2272
2273 if (is_x86_feature(x86_featureset, X86FSET_AVX))
2274 flags |= XFEATURE_AVX;
2275
2276 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2277 flags |= XFEATURE_AVX512;
2278
2279 set_xcr(XFEATURE_ENABLED_MASK, flags);
2280
2281 xsave_bv_all = flags;
2282 }
2283
2284 static void
2285 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2286 {
2287 struct cpuid_info *cpi;
2288
2289 cpi = cpu->cpu_m.mcpu_cpi;
2290
2291 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2292 cpuid_gather_amd_topology_leaves(cpu);
2293 }
2294
2295 cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2296
2297 /*
2298 * Before we can calculate the IDs that we should assign to this
2299 * processor, we need to understand how many cores and threads it has.
2300 */
2301 switch (cpi->cpi_vendor) {
2302 case X86_VENDOR_Intel:
2303 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2304 &cpi->cpi_ncore_per_chip);
2305 break;
2306 case X86_VENDOR_AMD:
2307 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2308 &cpi->cpi_ncore_per_chip);
2309 break;
2310 default:
2311 /*
2312 * If we have some other x86 compatible chip, it's not clear how
2313 * they would behave. The most common case is virtualization
2314 * today, though there are also 64-bit VIA chips. Assume that
2315 * all we can get is the basic Leaf 1 HTT information.
2316 */
2317 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2318 cpi->cpi_ncore_per_chip = 1;
2319 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2320 }
2321 break;
2322 }
2323
2324 /*
2325 * Based on the calculated number of threads and cores, potentially
2326 * assign the HTT and CMT features.
2327 */
2328 if (cpi->cpi_ncore_per_chip > 1) {
2329 add_x86_feature(featureset, X86FSET_CMP);
2330 }
2331
2332 if (cpi->cpi_ncpu_per_chip > 1 &&
2333 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2334 add_x86_feature(featureset, X86FSET_HTT);
2335 }
2336
2337 /*
2338 * Now that has been set up, we need to go through and calculate all of
2339 * the rest of the parameters that exist. If we think the CPU doesn't
2340 * have either SMT (HTT) or CMP, then we basically go through and fake
2341 * up information in some way. The most likely case for this is
2342 * virtualization where we have a lot of partial topology information.
2343 */
2344 if (!is_x86_feature(featureset, X86FSET_HTT) &&
2345 !is_x86_feature(featureset, X86FSET_CMP)) {
2346 /*
2347 * This is a single core, single-threaded processor.
2348 */
2349 cpi->cpi_procnodes_per_pkg = 1;
2350 cpi->cpi_cores_per_compunit = 1;
2351 cpi->cpi_compunitid = 0;
2352 cpi->cpi_chipid = -1;
2353 cpi->cpi_clogid = 0;
2354 cpi->cpi_coreid = cpu->cpu_id;
2355 cpi->cpi_pkgcoreid = 0;
2356 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2357 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2358 } else {
2359 cpi->cpi_procnodeid = cpi->cpi_chipid;
2360 }
2361 } else {
2362 switch (cpi->cpi_vendor) {
2363 case X86_VENDOR_Intel:
2364 cpuid_intel_getids(cpu, featureset);
2365 break;
2366 case X86_VENDOR_AMD:
2367 cpuid_amd_getids(cpu, featureset);
2368 break;
2369 default:
2370 /*
2371 * In this case, it's hard to say what we should do.
2372 * We're going to model them to the OS as single core
2373 * threads. We don't have a good identifier for them, so
2374 * we're just going to use the cpu id all on a single
2375 * chip.
2376 *
2377 * This case has historically been different from the
2378 * case above where we don't have HTT or CMP. While they
2379 * could be combined, we've opted to keep it separate to
2380 * minimize the risk of topology changes in weird cases.
2381 */
2382 cpi->cpi_procnodes_per_pkg = 1;
2383 cpi->cpi_cores_per_compunit = 1;
2384 cpi->cpi_chipid = 0;
2385 cpi->cpi_coreid = cpu->cpu_id;
2386 cpi->cpi_clogid = cpu->cpu_id;
2387 cpi->cpi_pkgcoreid = cpu->cpu_id;
2388 cpi->cpi_procnodeid = cpi->cpi_chipid;
2389 cpi->cpi_compunitid = cpi->cpi_coreid;
2390 break;
2391 }
2392 }
2393 }
2394
2395 void
2396 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2397 {
2398 uint32_t mask_ecx, mask_edx;
2399 struct cpuid_info *cpi;
2400 struct cpuid_regs *cp;
2401 int xcpuid;
2402 #if !defined(__xpv)
2403 extern int idle_cpu_prefer_mwait;
2404 #endif
2405
2406 /*
2407 * Space statically allocated for BSP, ensure pointer is set
2408 */
2409 if (cpu->cpu_id == 0) {
2410 if (cpu->cpu_m.mcpu_cpi == NULL)
2411 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2412 }
2413
2414 add_x86_feature(featureset, X86FSET_CPUID);
2415
2416 cpi = cpu->cpu_m.mcpu_cpi;
2417 ASSERT(cpi != NULL);
2418 cp = &cpi->cpi_std[0];
2419 cp->cp_eax = 0;
2420 cpi->cpi_maxeax = __cpuid_insn(cp);
2421 {
2422 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2423 *iptr++ = cp->cp_ebx;
2424 *iptr++ = cp->cp_edx;
2425 *iptr++ = cp->cp_ecx;
2426 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2427 }
2428
2429 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2430 x86_vendor = cpi->cpi_vendor; /* for compatibility */
2431
2432 /*
2433 * Limit the range in case of weird hardware
2434 */
2435 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2436 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2437 if (cpi->cpi_maxeax < 1)
2438 goto pass1_done;
2439
2440 cp = &cpi->cpi_std[1];
2441 cp->cp_eax = 1;
2442 (void) __cpuid_insn(cp);
2443
2444 /*
2445 * Extract identifying constants for easy access.
2446 */
2447 cpi->cpi_model = CPI_MODEL(cpi);
2448 cpi->cpi_family = CPI_FAMILY(cpi);
2449
2450 if (cpi->cpi_family == 0xf)
2451 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2452
2453 /*
2454 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2455 * Intel, and presumably everyone else, uses model == 0xf, as
2456 * one would expect (max value means possible overflow). Sigh.
2457 */
2458
2459 switch (cpi->cpi_vendor) {
2460 case X86_VENDOR_Intel:
2461 if (IS_EXTENDED_MODEL_INTEL(cpi))
2462 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2463 break;
2464 case X86_VENDOR_AMD:
2465 if (CPI_FAMILY(cpi) == 0xf)
2466 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2467 break;
2468 default:
2469 if (cpi->cpi_model == 0xf)
2470 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2471 break;
2472 }
2473
2474 cpi->cpi_step = CPI_STEP(cpi);
2475 cpi->cpi_brandid = CPI_BRANDID(cpi);
2476
2477 /*
2478 * *default* assumptions:
2479 * - believe %edx feature word
2480 * - ignore %ecx feature word
2481 * - 32-bit virtual and physical addressing
2482 */
2483 mask_edx = 0xffffffff;
2484 mask_ecx = 0;
2485
2486 cpi->cpi_pabits = cpi->cpi_vabits = 32;
2487
2488 switch (cpi->cpi_vendor) {
2489 case X86_VENDOR_Intel:
2490 if (cpi->cpi_family == 5)
2491 x86_type = X86_TYPE_P5;
2492 else if (IS_LEGACY_P6(cpi)) {
2493 x86_type = X86_TYPE_P6;
2494 pentiumpro_bug4046376 = 1;
2495 /*
2496 * Clear the SEP bit when it was set erroneously
2497 */
2498 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2499 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2500 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2501 x86_type = X86_TYPE_P4;
2502 /*
2503 * We don't currently depend on any of the %ecx
2504 * features until Prescott, so we'll only check
2505 * this from P4 onwards. We might want to revisit
2506 * that idea later.
2507 */
2508 mask_ecx = 0xffffffff;
2509 } else if (cpi->cpi_family > 0xf)
2510 mask_ecx = 0xffffffff;
2511 /*
2512 * We don't support MONITOR/MWAIT if leaf 5 is not available
2513 * to obtain the monitor linesize.
2514 */
2515 if (cpi->cpi_maxeax < 5)
2516 mask_ecx &= ~CPUID_INTC_ECX_MON;
2517 break;
2518 case X86_VENDOR_IntelClone:
2519 default:
2520 break;
2521 case X86_VENDOR_AMD:
2522 #if defined(OPTERON_ERRATUM_108)
2523 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2524 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2525 cpi->cpi_model = 0xc;
2526 } else
2527 #endif
2528 if (cpi->cpi_family == 5) {
2529 /*
2530 * AMD K5 and K6
2531 *
2532 * These CPUs have an incomplete implementation
2533 * of MCA/MCE which we mask away.
2534 */
2535 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2536
2537 /*
2538 * Model 0 uses the wrong (APIC) bit
2539 * to indicate PGE. Fix it here.
2540 */
2541 if (cpi->cpi_model == 0) {
2542 if (cp->cp_edx & 0x200) {
2543 cp->cp_edx &= ~0x200;
2544 cp->cp_edx |= CPUID_INTC_EDX_PGE;
2545 }
2546 }
2547
2548 /*
2549 * Early models had problems w/ MMX; disable.
2550 */
2551 if (cpi->cpi_model < 6)
2552 mask_edx &= ~CPUID_INTC_EDX_MMX;
2553 }
2554
2555 /*
2556 * For newer families, SSE3 and CX16, at least, are valid;
2557 * enable all
2558 */
2559 if (cpi->cpi_family >= 0xf)
2560 mask_ecx = 0xffffffff;
2561 /*
2562 * We don't support MONITOR/MWAIT if leaf 5 is not available
2563 * to obtain the monitor linesize.
2564 */
2565 if (cpi->cpi_maxeax < 5)
2566 mask_ecx &= ~CPUID_INTC_ECX_MON;
2567
2568 #if !defined(__xpv)
2569 /*
2570 * AMD has not historically used MWAIT in the CPU's idle loop.
2571 * Pre-family-10h Opterons do not have the MWAIT instruction. We
2572 * know for certain that in at least family 17h, per AMD, mwait
2573 * is preferred. Families in-between are less certain.
2574 */
2575 if (cpi->cpi_family < 0x17) {
2576 idle_cpu_prefer_mwait = 0;
2577 }
2578 #endif
2579
2580 break;
2581 case X86_VENDOR_TM:
2582 /*
2583 * workaround the NT workaround in CMS 4.1
2584 */
2585 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2586 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2587 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2588 break;
2589 case X86_VENDOR_Centaur:
2590 /*
2591 * workaround the NT workarounds again
2592 */
2593 if (cpi->cpi_family == 6)
2594 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2595 break;
2596 case X86_VENDOR_Cyrix:
2597 /*
2598 * We rely heavily on the probing in locore
2599 * to actually figure out what parts, if any,
2600 * of the Cyrix cpuid instruction to believe.
2601 */
2602 switch (x86_type) {
2603 case X86_TYPE_CYRIX_486:
2604 mask_edx = 0;
2605 break;
2606 case X86_TYPE_CYRIX_6x86:
2607 mask_edx = 0;
2608 break;
2609 case X86_TYPE_CYRIX_6x86L:
2610 mask_edx =
2611 CPUID_INTC_EDX_DE |
2612 CPUID_INTC_EDX_CX8;
2613 break;
2614 case X86_TYPE_CYRIX_6x86MX:
2615 mask_edx =
2616 CPUID_INTC_EDX_DE |
2617 CPUID_INTC_EDX_MSR |
2618 CPUID_INTC_EDX_CX8 |
2619 CPUID_INTC_EDX_PGE |
2620 CPUID_INTC_EDX_CMOV |
2621 CPUID_INTC_EDX_MMX;
2622 break;
2623 case X86_TYPE_CYRIX_GXm:
2624 mask_edx =
2625 CPUID_INTC_EDX_MSR |
2626 CPUID_INTC_EDX_CX8 |
2627 CPUID_INTC_EDX_CMOV |
2628 CPUID_INTC_EDX_MMX;
2629 break;
2630 case X86_TYPE_CYRIX_MediaGX:
2631 break;
2632 case X86_TYPE_CYRIX_MII:
2633 case X86_TYPE_VIA_CYRIX_III:
2634 mask_edx =
2635 CPUID_INTC_EDX_DE |
2636 CPUID_INTC_EDX_TSC |
2637 CPUID_INTC_EDX_MSR |
2638 CPUID_INTC_EDX_CX8 |
2639 CPUID_INTC_EDX_PGE |
2640 CPUID_INTC_EDX_CMOV |
2641 CPUID_INTC_EDX_MMX;
2642 break;
2643 default:
2644 break;
2645 }
2646 break;
2647 }
2648
2649 #if defined(__xpv)
2650 /*
2651 * Do not support MONITOR/MWAIT under a hypervisor
2652 */
2653 mask_ecx &= ~CPUID_INTC_ECX_MON;
2654 /*
2655 * Do not support XSAVE under a hypervisor for now
2656 */
2657 xsave_force_disable = B_TRUE;
2658
2659 #endif /* __xpv */
2660
2661 if (xsave_force_disable) {
2662 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2663 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2664 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2665 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2666 }
2667
2668 /*
2669 * Now we've figured out the masks that determine
2670 * which bits we choose to believe, apply the masks
2671 * to the feature words, then map the kernel's view
2672 * of these feature words into its feature word.
2673 */
2674 cp->cp_edx &= mask_edx;
2675 cp->cp_ecx &= mask_ecx;
2676
2677 /*
2678 * apply any platform restrictions (we don't call this
2679 * immediately after __cpuid_insn here, because we need the
2680 * workarounds applied above first)
2681 */
2682 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2683
2684 /*
2685 * In addition to ecx and edx, Intel and AMD are storing a bunch of
2686 * instruction set extensions in leaf 7's ebx, ecx, and edx.
2687 */
2688 if (cpi->cpi_maxeax >= 7) {
2689 struct cpuid_regs *ecp;
2690 ecp = &cpi->cpi_std[7];
2691 ecp->cp_eax = 7;
2692 ecp->cp_ecx = 0;
2693 (void) __cpuid_insn(ecp);
2694
2695 /*
2696 * If XSAVE has been disabled, just ignore all of the
2697 * extended-save-area dependent flags here.
2698 */
2699 if (xsave_force_disable) {
2700 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2701 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2702 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2703 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2704 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2705 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2706 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2707 }
2708
2709 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2710 add_x86_feature(featureset, X86FSET_SMEP);
2711
2712 /*
2713 * We check disable_smap here in addition to in startup_smap()
2714 * to ensure CPUs that aren't the boot CPU don't accidentally
2715 * include it in the feature set and thus generate a mismatched
2716 * x86 feature set across CPUs.
2717 */
2718 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2719 disable_smap == 0)
2720 add_x86_feature(featureset, X86FSET_SMAP);
2721
2722 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2723 add_x86_feature(featureset, X86FSET_RDSEED);
2724
2725 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2726 add_x86_feature(featureset, X86FSET_ADX);
2727
2728 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2729 add_x86_feature(featureset, X86FSET_FSGSBASE);
2730
2731 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2732 add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2733
2734 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2735 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2736 add_x86_feature(featureset, X86FSET_INVPCID);
2737
2738 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2739 add_x86_feature(featureset, X86FSET_MPX);
2740
2741 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2742 add_x86_feature(featureset, X86FSET_CLWB);
2743 }
2744 }
2745
2746 /*
2747 * fold in overrides from the "eeprom" mechanism
2748 */
2749 cp->cp_edx |= cpuid_feature_edx_include;
2750 cp->cp_edx &= ~cpuid_feature_edx_exclude;
2751
2752 cp->cp_ecx |= cpuid_feature_ecx_include;
2753 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2754
2755 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2756 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2757 }
2758 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2759 add_x86_feature(featureset, X86FSET_TSC);
2760 }
2761 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2762 add_x86_feature(featureset, X86FSET_MSR);
2763 }
2764 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2765 add_x86_feature(featureset, X86FSET_MTRR);
2766 }
2767 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2768 add_x86_feature(featureset, X86FSET_PGE);
2769 }
2770 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2771 add_x86_feature(featureset, X86FSET_CMOV);
2772 }
2773 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2774 add_x86_feature(featureset, X86FSET_MMX);
2775 }
2776 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2777 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2778 add_x86_feature(featureset, X86FSET_MCA);
2779 }
2780 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2781 add_x86_feature(featureset, X86FSET_PAE);
2782 }
2783 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2784 add_x86_feature(featureset, X86FSET_CX8);
2785 }
2786 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2787 add_x86_feature(featureset, X86FSET_CX16);
2788 }
2789 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2790 add_x86_feature(featureset, X86FSET_PAT);
2791 }
2792 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2793 add_x86_feature(featureset, X86FSET_SEP);
2794 }
2795 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2796 /*
2797 * In our implementation, fxsave/fxrstor
2798 * are prerequisites before we'll even
2799 * try and do SSE things.
2800 */
2801 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2802 add_x86_feature(featureset, X86FSET_SSE);
2803 }
2804 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2805 add_x86_feature(featureset, X86FSET_SSE2);
2806 }
2807 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2808 add_x86_feature(featureset, X86FSET_SSE3);
2809 }
2810 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2811 add_x86_feature(featureset, X86FSET_SSSE3);
2812 }
2813 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2814 add_x86_feature(featureset, X86FSET_SSE4_1);
2815 }
2816 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2817 add_x86_feature(featureset, X86FSET_SSE4_2);
2818 }
2819 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2820 add_x86_feature(featureset, X86FSET_AES);
2821 }
2822 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2823 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2824 }
2825
2826 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2827 add_x86_feature(featureset, X86FSET_SHA);
2828
2829 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2830 add_x86_feature(featureset, X86FSET_UMIP);
2831 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2832 add_x86_feature(featureset, X86FSET_PKU);
2833 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2834 add_x86_feature(featureset, X86FSET_OSPKE);
2835
2836 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2837 add_x86_feature(featureset, X86FSET_XSAVE);
2838
2839 /* We only test AVX & AVX512 when there is XSAVE */
2840
2841 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2842 add_x86_feature(featureset,
2843 X86FSET_AVX);
2844
2845 /*
2846 * Intel says we can't check these without also
2847 * checking AVX.
2848 */
2849 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2850 add_x86_feature(featureset,
2851 X86FSET_F16C);
2852
2853 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2854 add_x86_feature(featureset,
2855 X86FSET_FMA);
2856
2857 if (cpi->cpi_std[7].cp_ebx &
2858 CPUID_INTC_EBX_7_0_BMI1)
2859 add_x86_feature(featureset,
2860 X86FSET_BMI1);
2861
2862 if (cpi->cpi_std[7].cp_ebx &
2863 CPUID_INTC_EBX_7_0_BMI2)
2864 add_x86_feature(featureset,
2865 X86FSET_BMI2);
2866
2867 if (cpi->cpi_std[7].cp_ebx &
2868 CPUID_INTC_EBX_7_0_AVX2)
2869 add_x86_feature(featureset,
2870 X86FSET_AVX2);
2871 }
2872
2873 if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2874 (cpi->cpi_std[7].cp_ebx &
2875 CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2876 add_x86_feature(featureset, X86FSET_AVX512F);
2877
2878 if (cpi->cpi_std[7].cp_ebx &
2879 CPUID_INTC_EBX_7_0_AVX512DQ)
2880 add_x86_feature(featureset,
2881 X86FSET_AVX512DQ);
2882 if (cpi->cpi_std[7].cp_ebx &
2883 CPUID_INTC_EBX_7_0_AVX512IFMA)
2884 add_x86_feature(featureset,
2885 X86FSET_AVX512FMA);
2886 if (cpi->cpi_std[7].cp_ebx &
2887 CPUID_INTC_EBX_7_0_AVX512PF)
2888 add_x86_feature(featureset,
2889 X86FSET_AVX512PF);
2890 if (cpi->cpi_std[7].cp_ebx &
2891 CPUID_INTC_EBX_7_0_AVX512ER)
2892 add_x86_feature(featureset,
2893 X86FSET_AVX512ER);
2894 if (cpi->cpi_std[7].cp_ebx &
2895 CPUID_INTC_EBX_7_0_AVX512CD)
2896 add_x86_feature(featureset,
2897 X86FSET_AVX512CD);
2898 if (cpi->cpi_std[7].cp_ebx &
2899 CPUID_INTC_EBX_7_0_AVX512BW)
2900 add_x86_feature(featureset,
2901 X86FSET_AVX512BW);
2902 if (cpi->cpi_std[7].cp_ebx &
2903 CPUID_INTC_EBX_7_0_AVX512VL)
2904 add_x86_feature(featureset,
2905 X86FSET_AVX512VL);
2906
2907 if (cpi->cpi_std[7].cp_ecx &
2908 CPUID_INTC_ECX_7_0_AVX512VBMI)
2909 add_x86_feature(featureset,
2910 X86FSET_AVX512VBMI);
2911 if (cpi->cpi_std[7].cp_ecx &
2912 CPUID_INTC_ECX_7_0_AVX512VNNI)
2913 add_x86_feature(featureset,
2914 X86FSET_AVX512VNNI);
2915 if (cpi->cpi_std[7].cp_ecx &
2916 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2917 add_x86_feature(featureset,
2918 X86FSET_AVX512VPOPCDQ);
2919
2920 if (cpi->cpi_std[7].cp_edx &
2921 CPUID_INTC_EDX_7_0_AVX5124NNIW)
2922 add_x86_feature(featureset,
2923 X86FSET_AVX512NNIW);
2924 if (cpi->cpi_std[7].cp_edx &
2925 CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2926 add_x86_feature(featureset,
2927 X86FSET_AVX512FMAPS);
2928 }
2929 }
2930 }
2931
2932 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2933 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2934 add_x86_feature(featureset, X86FSET_PCID);
2935 }
2936 }
2937
2938 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2939 add_x86_feature(featureset, X86FSET_X2APIC);
2940 }
2941 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2942 add_x86_feature(featureset, X86FSET_DE);
2943 }
2944 #if !defined(__xpv)
2945 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2946
2947 /*
2948 * We require the CLFLUSH instruction for erratum workaround
2949 * to use MONITOR/MWAIT.
2950 */
2951 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2952 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2953 add_x86_feature(featureset, X86FSET_MWAIT);
2954 } else {
2955 extern int idle_cpu_assert_cflush_monitor;
2956
2957 /*
2958 * All processors we are aware of which have
2959 * MONITOR/MWAIT also have CLFLUSH.
2960 */
2961 if (idle_cpu_assert_cflush_monitor) {
2962 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2963 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2964 }
2965 }
2966 }
2967 #endif /* __xpv */
2968
2969 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2970 add_x86_feature(featureset, X86FSET_VMX);
2971 }
2972
2973 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2974 add_x86_feature(featureset, X86FSET_RDRAND);
2975
2976 /*
2977 * Only need it first time, rest of the cpus would follow suit.
2978 * we only capture this for the bootcpu.
2979 */
2980 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2981 add_x86_feature(featureset, X86FSET_CLFSH);
2982 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2983 }
2984 if (is_x86_feature(featureset, X86FSET_PAE))
2985 cpi->cpi_pabits = 36;
2986
2987 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2988 struct cpuid_regs r, *ecp;
2989
2990 ecp = &r;
2991 ecp->cp_eax = 0xD;
2992 ecp->cp_ecx = 1;
2993 ecp->cp_edx = ecp->cp_ebx = 0;
2994 (void) __cpuid_insn(ecp);
2995
2996 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2997 add_x86_feature(featureset, X86FSET_XSAVEOPT);
2998 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2999 add_x86_feature(featureset, X86FSET_XSAVEC);
3000 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
3001 add_x86_feature(featureset, X86FSET_XSAVES);
3002 }
3003
3004 /*
3005 * Work on the "extended" feature information, doing
3006 * some basic initialization for cpuid_pass2()
3007 */
3008 xcpuid = 0;
3009 switch (cpi->cpi_vendor) {
3010 case X86_VENDOR_Intel:
3011 /*
3012 * On KVM we know we will have proper support for extended
3013 * cpuid.
3014 */
3015 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
3016 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
3017 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
3018 xcpuid++;
3019 break;
3020 case X86_VENDOR_AMD:
3021 if (cpi->cpi_family > 5 ||
3022 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3023 xcpuid++;
3024 break;
3025 case X86_VENDOR_Cyrix:
3026 /*
3027 * Only these Cyrix CPUs are -known- to support
3028 * extended cpuid operations.
3029 */
3030 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
3031 x86_type == X86_TYPE_CYRIX_GXm)
3032 xcpuid++;
3033 break;
3034 case X86_VENDOR_Centaur:
3035 case X86_VENDOR_TM:
3036 default:
3037 xcpuid++;
3038 break;
3039 }
3040
3041 if (xcpuid) {
3042 cp = &cpi->cpi_extd[0];
3043 cp->cp_eax = CPUID_LEAF_EXT_0;
3044 cpi->cpi_xmaxeax = __cpuid_insn(cp);
3045 }
3046
3047 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
3048
3049 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3050 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3051
3052 switch (cpi->cpi_vendor) {
3053 case X86_VENDOR_Intel:
3054 case X86_VENDOR_AMD:
3055 if (cpi->cpi_xmaxeax < 0x80000001)
3056 break;
3057 cp = &cpi->cpi_extd[1];
3058 cp->cp_eax = 0x80000001;
3059 (void) __cpuid_insn(cp);
3060
3061 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3062 cpi->cpi_family == 5 &&
3063 cpi->cpi_model == 6 &&
3064 cpi->cpi_step == 6) {
3065 /*
3066 * K6 model 6 uses bit 10 to indicate SYSC
3067 * Later models use bit 11. Fix it here.
3068 */
3069 if (cp->cp_edx & 0x400) {
3070 cp->cp_edx &= ~0x400;
3071 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3072 }
3073 }
3074
3075 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3076
3077 /*
3078 * Compute the additions to the kernel's feature word.
3079 */
3080 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3081 add_x86_feature(featureset, X86FSET_NX);
3082 }
3083
3084 /*
3085 * Regardless whether or not we boot 64-bit,
3086 * we should have a way to identify whether
3087 * the CPU is capable of running 64-bit.
3088 */
3089 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3090 add_x86_feature(featureset, X86FSET_64);
3091 }
3092
3093 /* 1 GB large page - enable only for 64 bit kernel */
3094 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3095 add_x86_feature(featureset, X86FSET_1GPG);
3096 }
3097
3098 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3099 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3100 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3101 add_x86_feature(featureset, X86FSET_SSE4A);
3102 }
3103
3104 /*
3105 * It's really tricky to support syscall/sysret in
3106 * the i386 kernel; we rely on sysenter/sysexit
3107 * instead. In the amd64 kernel, things are -way-
3108 * better.
3109 */
3110 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3111 add_x86_feature(featureset, X86FSET_ASYSC);
3112 }
3113
3114 /*
3115 * While we're thinking about system calls, note
3116 * that AMD processors don't support sysenter
3117 * in long mode at all, so don't try to program them.
3118 */
3119 if (x86_vendor == X86_VENDOR_AMD) {
3120 remove_x86_feature(featureset, X86FSET_SEP);
3121 }
3122
3123 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3124 add_x86_feature(featureset, X86FSET_TSCP);
3125 }
3126
3127 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3128 add_x86_feature(featureset, X86FSET_SVM);
3129 }
3130
3131 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3132 add_x86_feature(featureset, X86FSET_TOPOEXT);
3133 }
3134
3135 if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) {
3136 add_x86_feature(featureset, X86FSET_AMD_PCEC);
3137 }
3138
3139 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3140 add_x86_feature(featureset, X86FSET_XOP);
3141 }
3142
3143 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3144 add_x86_feature(featureset, X86FSET_FMA4);
3145 }
3146
3147 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3148 add_x86_feature(featureset, X86FSET_TBM);
3149 }
3150
3151 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3152 add_x86_feature(featureset, X86FSET_MONITORX);
3153 }
3154 break;
3155 default:
3156 break;
3157 }
3158
3159 /*
3160 * Get CPUID data about processor cores and hyperthreads.
3161 */
3162 switch (cpi->cpi_vendor) {
3163 case X86_VENDOR_Intel:
3164 if (cpi->cpi_maxeax >= 4) {
3165 cp = &cpi->cpi_std[4];
3166 cp->cp_eax = 4;
3167 cp->cp_ecx = 0;
3168 (void) __cpuid_insn(cp);
3169 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3170 }
3171 /*FALLTHROUGH*/
3172 case X86_VENDOR_AMD:
3173 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3174 break;
3175 cp = &cpi->cpi_extd[8];
3176 cp->cp_eax = CPUID_LEAF_EXT_8;
3177 (void) __cpuid_insn(cp);
3178 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3179 cp);
3180
3181 /*
3182 * AMD uses ebx for some extended functions.
3183 */
3184 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3185 /*
3186 * While we're here, check for the AMD "Error
3187 * Pointer Zero/Restore" feature. This can be
3188 * used to setup the FP save handlers
3189 * appropriately.
3190 */
3191 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3192 cpi->cpi_fp_amd_save = 0;
3193 } else {
3194 cpi->cpi_fp_amd_save = 1;
3195 }
3196
3197 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3198 add_x86_feature(featureset,
3199 X86FSET_CLZERO);
3200 }
3201 }
3202
3203 /*
3204 * Virtual and physical address limits from
3205 * cpuid override previously guessed values.
3206 */
3207 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3208 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3209 break;
3210 default:
3211 break;
3212 }
3213
3214 /*
3215 * Get CPUID data about TSC Invariance in Deep C-State.
3216 */
3217 switch (cpi->cpi_vendor) {
3218 case X86_VENDOR_Intel:
3219 case X86_VENDOR_AMD:
3220 if (cpi->cpi_maxeax >= 7) {
3221 cp = &cpi->cpi_extd[7];
3222 cp->cp_eax = 0x80000007;
3223 cp->cp_ecx = 0;
3224 (void) __cpuid_insn(cp);
3225 }
3226 break;
3227 default:
3228 break;
3229 }
3230 }
3231
3232 cpuid_pass1_topology(cpu, featureset);
3233
3234 /*
3235 * Synthesize chip "revision" and socket type
3236 */
3237 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3238 cpi->cpi_model, cpi->cpi_step);
3239 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3240 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3241 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3242 cpi->cpi_model, cpi->cpi_step);
3243
3244 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3245 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3246 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3247 /* Special handling for AMD FP not necessary. */
3248 cpi->cpi_fp_amd_save = 0;
3249 } else {
3250 cpi->cpi_fp_amd_save = 1;
3251 }
3252 }
3253
3254 /*
3255 * Check the processor leaves that are used for security features.
3256 */
3257 cpuid_scan_security(cpu, featureset);
3258
3259 pass1_done:
3260 cpi->cpi_pass = 1;
3261 }
3262
3263 /*
3264 * Make copies of the cpuid table entries we depend on, in
3265 * part for ease of parsing now, in part so that we have only
3266 * one place to correct any of it, in part for ease of
3267 * later export to userland, and in part so we can look at
3268 * this stuff in a crash dump.
3269 */
3270
3271 /*ARGSUSED*/
3272 void
3273 cpuid_pass2(cpu_t *cpu)
3274 {
3275 uint_t n, nmax;
3276 int i;
3277 struct cpuid_regs *cp;
3278 uint8_t *dp;
3279 uint32_t *iptr;
3280 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3281
3282 ASSERT(cpi->cpi_pass == 1);
3283
3284 if (cpi->cpi_maxeax < 1)
3285 goto pass2_done;
3286
3287 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3288 nmax = NMAX_CPI_STD;
3289 /*
3290 * (We already handled n == 0 and n == 1 in pass 1)
3291 */
3292 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3293 cp->cp_eax = n;
3294
3295 /*
3296 * n == 7 was handled in pass 1
3297 */
3298 if (n == 7)
3299 continue;
3300
3301 /*
3302 * CPUID function 4 expects %ecx to be initialized
3303 * with an index which indicates which cache to return
3304 * information about. The OS is expected to call function 4
3305 * with %ecx set to 0, 1, 2, ... until it returns with
3306 * EAX[4:0] set to 0, which indicates there are no more
3307 * caches.
3308 *
3309 * Here, populate cpi_std[4] with the information returned by
3310 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3311 * when dynamic memory allocation becomes available.
3312 *
3313 * Note: we need to explicitly initialize %ecx here, since
3314 * function 4 may have been previously invoked.
3315 */
3316 if (n == 4)
3317 cp->cp_ecx = 0;
3318
3319 (void) __cpuid_insn(cp);
3320 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3321 switch (n) {
3322 case 2:
3323 /*
3324 * "the lower 8 bits of the %eax register
3325 * contain a value that identifies the number
3326 * of times the cpuid [instruction] has to be
3327 * executed to obtain a complete image of the
3328 * processor's caching systems."
3329 *
3330 * How *do* they make this stuff up?
3331 */
3332 cpi->cpi_ncache = sizeof (*cp) *
3333 BITX(cp->cp_eax, 7, 0);
3334 if (cpi->cpi_ncache == 0)
3335 break;
3336 cpi->cpi_ncache--; /* skip count byte */
3337
3338 /*
3339 * Well, for now, rather than attempt to implement
3340 * this slightly dubious algorithm, we just look
3341 * at the first 15 ..
3342 */
3343 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3344 cpi->cpi_ncache = sizeof (*cp) - 1;
3345
3346 dp = cpi->cpi_cacheinfo;
3347 if (BITX(cp->cp_eax, 31, 31) == 0) {
3348 uint8_t *p = (void *)&cp->cp_eax;
3349 for (i = 1; i < 4; i++)
3350 if (p[i] != 0)
3351 *dp++ = p[i];
3352 }
3353 if (BITX(cp->cp_ebx, 31, 31) == 0) {
3354 uint8_t *p = (void *)&cp->cp_ebx;
3355 for (i = 0; i < 4; i++)
3356 if (p[i] != 0)
3357 *dp++ = p[i];
3358 }
3359 if (BITX(cp->cp_ecx, 31, 31) == 0) {
3360 uint8_t *p = (void *)&cp->cp_ecx;
3361 for (i = 0; i < 4; i++)
3362 if (p[i] != 0)
3363 *dp++ = p[i];
3364 }
3365 if (BITX(cp->cp_edx, 31, 31) == 0) {
3366 uint8_t *p = (void *)&cp->cp_edx;
3367 for (i = 0; i < 4; i++)
3368 if (p[i] != 0)
3369 *dp++ = p[i];
3370 }
3371 break;
3372
3373 case 3: /* Processor serial number, if PSN supported */
3374 break;
3375
3376 case 4: /* Deterministic cache parameters */
3377 break;
3378
3379 case 5: /* Monitor/Mwait parameters */
3380 {
3381 size_t mwait_size;
3382
3383 /*
3384 * check cpi_mwait.support which was set in cpuid_pass1
3385 */
3386 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3387 break;
3388
3389 /*
3390 * Protect ourself from insane mwait line size.
3391 * Workaround for incomplete hardware emulator(s).
3392 */
3393 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3394 if (mwait_size < sizeof (uint32_t) ||
3395 !ISP2(mwait_size)) {
3396 #if DEBUG
3397 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3398 "size %ld", cpu->cpu_id, (long)mwait_size);
3399 #endif
3400 break;
3401 }
3402
3403 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3404 cpi->cpi_mwait.mon_max = mwait_size;
3405 if (MWAIT_EXTENSION(cpi)) {
3406 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3407 if (MWAIT_INT_ENABLE(cpi))
3408 cpi->cpi_mwait.support |=
3409 MWAIT_ECX_INT_ENABLE;
3410 }
3411 break;
3412 }
3413 default:
3414 break;
3415 }
3416 }
3417
3418 /*
3419 * XSAVE enumeration
3420 */
3421 if (cpi->cpi_maxeax >= 0xD) {
3422 struct cpuid_regs regs;
3423 boolean_t cpuid_d_valid = B_TRUE;
3424
3425 cp = ®s;
3426 cp->cp_eax = 0xD;
3427 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3428
3429 (void) __cpuid_insn(cp);
3430
3431 /*
3432 * Sanity checks for debug
3433 */
3434 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3435 (cp->cp_eax & XFEATURE_SSE) == 0) {
3436 cpuid_d_valid = B_FALSE;
3437 }
3438
3439 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3440 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3441 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3442
3443 /*
3444 * If the hw supports AVX, get the size and offset in the save
3445 * area for the ymm state.
3446 */
3447 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3448 cp->cp_eax = 0xD;
3449 cp->cp_ecx = 2;
3450 cp->cp_edx = cp->cp_ebx = 0;
3451
3452 (void) __cpuid_insn(cp);
3453
3454 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3455 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3456 cpuid_d_valid = B_FALSE;
3457 }
3458
3459 cpi->cpi_xsave.ymm_size = cp->cp_eax;
3460 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3461 }
3462
3463 /*
3464 * If the hw supports MPX, get the size and offset in the
3465 * save area for BNDREGS and BNDCSR.
3466 */
3467 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3468 cp->cp_eax = 0xD;
3469 cp->cp_ecx = 3;
3470 cp->cp_edx = cp->cp_ebx = 0;
3471
3472 (void) __cpuid_insn(cp);
3473
3474 cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3475 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3476
3477 cp->cp_eax = 0xD;
3478 cp->cp_ecx = 4;
3479 cp->cp_edx = cp->cp_ebx = 0;
3480
3481 (void) __cpuid_insn(cp);
3482
3483 cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3484 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3485 }
3486
3487 /*
3488 * If the hw supports AVX512, get the size and offset in the
3489 * save area for the opmask registers and zmm state.
3490 */
3491 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3492 cp->cp_eax = 0xD;
3493 cp->cp_ecx = 5;
3494 cp->cp_edx = cp->cp_ebx = 0;
3495
3496 (void) __cpuid_insn(cp);
3497
3498 cpi->cpi_xsave.opmask_size = cp->cp_eax;
3499 cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3500
3501 cp->cp_eax = 0xD;
3502 cp->cp_ecx = 6;
3503 cp->cp_edx = cp->cp_ebx = 0;
3504
3505 (void) __cpuid_insn(cp);
3506
3507 cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3508 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3509
3510 cp->cp_eax = 0xD;
3511 cp->cp_ecx = 7;
3512 cp->cp_edx = cp->cp_ebx = 0;
3513
3514 (void) __cpuid_insn(cp);
3515
3516 cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3517 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3518 }
3519
3520 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3521 xsave_state_size = 0;
3522 } else if (cpuid_d_valid) {
3523 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3524 } else {
3525 /* Broken CPUID 0xD, probably in HVM */
3526 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3527 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3528 ", ymm_size = %d, ymm_offset = %d\n",
3529 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3530 cpi->cpi_xsave.xsav_hw_features_high,
3531 (int)cpi->cpi_xsave.xsav_max_size,
3532 (int)cpi->cpi_xsave.ymm_size,
3533 (int)cpi->cpi_xsave.ymm_offset);
3534
3535 if (xsave_state_size != 0) {
3536 /*
3537 * This must be a non-boot CPU. We cannot
3538 * continue, because boot cpu has already
3539 * enabled XSAVE.
3540 */
3541 ASSERT(cpu->cpu_id != 0);
3542 cmn_err(CE_PANIC, "cpu%d: we have already "
3543 "enabled XSAVE on boot cpu, cannot "
3544 "continue.", cpu->cpu_id);
3545 } else {
3546 /*
3547 * If we reached here on the boot CPU, it's also
3548 * almost certain that we'll reach here on the
3549 * non-boot CPUs. When we're here on a boot CPU
3550 * we should disable the feature, on a non-boot
3551 * CPU we need to confirm that we have.
3552 */
3553 if (cpu->cpu_id == 0) {
3554 remove_x86_feature(x86_featureset,
3555 X86FSET_XSAVE);
3556 remove_x86_feature(x86_featureset,
3557 X86FSET_AVX);
3558 remove_x86_feature(x86_featureset,
3559 X86FSET_F16C);
3560 remove_x86_feature(x86_featureset,
3561 X86FSET_BMI1);
3562 remove_x86_feature(x86_featureset,
3563 X86FSET_BMI2);
3564 remove_x86_feature(x86_featureset,
3565 X86FSET_FMA);
3566 remove_x86_feature(x86_featureset,
3567 X86FSET_AVX2);
3568 remove_x86_feature(x86_featureset,
3569 X86FSET_MPX);
3570 remove_x86_feature(x86_featureset,
3571 X86FSET_AVX512F);
3572 remove_x86_feature(x86_featureset,
3573 X86FSET_AVX512DQ);
3574 remove_x86_feature(x86_featureset,
3575 X86FSET_AVX512PF);
3576 remove_x86_feature(x86_featureset,
3577 X86FSET_AVX512ER);
3578 remove_x86_feature(x86_featureset,
3579 X86FSET_AVX512CD);
3580 remove_x86_feature(x86_featureset,
3581 X86FSET_AVX512BW);
3582 remove_x86_feature(x86_featureset,
3583 X86FSET_AVX512VL);
3584 remove_x86_feature(x86_featureset,
3585 X86FSET_AVX512FMA);
3586 remove_x86_feature(x86_featureset,
3587 X86FSET_AVX512VBMI);
3588 remove_x86_feature(x86_featureset,
3589 X86FSET_AVX512VNNI);
3590 remove_x86_feature(x86_featureset,
3591 X86FSET_AVX512VPOPCDQ);
3592 remove_x86_feature(x86_featureset,
3593 X86FSET_AVX512NNIW);
3594 remove_x86_feature(x86_featureset,
3595 X86FSET_AVX512FMAPS);
3596
3597 CPI_FEATURES_ECX(cpi) &=
3598 ~CPUID_INTC_ECX_XSAVE;
3599 CPI_FEATURES_ECX(cpi) &=
3600 ~CPUID_INTC_ECX_AVX;
3601 CPI_FEATURES_ECX(cpi) &=
3602 ~CPUID_INTC_ECX_F16C;
3603 CPI_FEATURES_ECX(cpi) &=
3604 ~CPUID_INTC_ECX_FMA;
3605 CPI_FEATURES_7_0_EBX(cpi) &=
3606 ~CPUID_INTC_EBX_7_0_BMI1;
3607 CPI_FEATURES_7_0_EBX(cpi) &=
3608 ~CPUID_INTC_EBX_7_0_BMI2;
3609 CPI_FEATURES_7_0_EBX(cpi) &=
3610 ~CPUID_INTC_EBX_7_0_AVX2;
3611 CPI_FEATURES_7_0_EBX(cpi) &=
3612 ~CPUID_INTC_EBX_7_0_MPX;
3613 CPI_FEATURES_7_0_EBX(cpi) &=
3614 ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3615
3616 CPI_FEATURES_7_0_ECX(cpi) &=
3617 ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3618
3619 CPI_FEATURES_7_0_EDX(cpi) &=
3620 ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3621
3622 xsave_force_disable = B_TRUE;
3623 } else {
3624 VERIFY(is_x86_feature(x86_featureset,
3625 X86FSET_XSAVE) == B_FALSE);
3626 }
3627 }
3628 }
3629 }
3630
3631
3632 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3633 goto pass2_done;
3634
3635 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3636 nmax = NMAX_CPI_EXTD;
3637 /*
3638 * Copy the extended properties, fixing them as we go.
3639 * (We already handled n == 0 and n == 1 in pass 1)
3640 */
3641 iptr = (void *)cpi->cpi_brandstr;
3642 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3643 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3644 (void) __cpuid_insn(cp);
3645 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3646 cp);
3647 switch (n) {
3648 case 2:
3649 case 3:
3650 case 4:
3651 /*
3652 * Extract the brand string
3653 */
3654 *iptr++ = cp->cp_eax;
3655 *iptr++ = cp->cp_ebx;
3656 *iptr++ = cp->cp_ecx;
3657 *iptr++ = cp->cp_edx;
3658 break;
3659 case 5:
3660 switch (cpi->cpi_vendor) {
3661 case X86_VENDOR_AMD:
3662 /*
3663 * The Athlon and Duron were the first
3664 * parts to report the sizes of the
3665 * TLB for large pages. Before then,
3666 * we don't trust the data.
3667 */
3668 if (cpi->cpi_family < 6 ||
3669 (cpi->cpi_family == 6 &&
3670 cpi->cpi_model < 1))
3671 cp->cp_eax = 0;
3672 break;
3673 default:
3674 break;
3675 }
3676 break;
3677 case 6:
3678 switch (cpi->cpi_vendor) {
3679 case X86_VENDOR_AMD:
3680 /*
3681 * The Athlon and Duron were the first
3682 * AMD parts with L2 TLB's.
3683 * Before then, don't trust the data.
3684 */
3685 if (cpi->cpi_family < 6 ||
3686 cpi->cpi_family == 6 &&
3687 cpi->cpi_model < 1)
3688 cp->cp_eax = cp->cp_ebx = 0;
3689 /*
3690 * AMD Duron rev A0 reports L2
3691 * cache size incorrectly as 1K
3692 * when it is really 64K
3693 */
3694 if (cpi->cpi_family == 6 &&
3695 cpi->cpi_model == 3 &&
3696 cpi->cpi_step == 0) {
3697 cp->cp_ecx &= 0xffff;
3698 cp->cp_ecx |= 0x400000;
3699 }
3700 break;
3701 case X86_VENDOR_Cyrix: /* VIA C3 */
3702 /*
3703 * VIA C3 processors are a bit messed
3704 * up w.r.t. encoding cache sizes in %ecx
3705 */
3706 if (cpi->cpi_family != 6)
3707 break;
3708 /*
3709 * model 7 and 8 were incorrectly encoded
3710 *
3711 * xxx is model 8 really broken?
3712 */
3713 if (cpi->cpi_model == 7 ||
3714 cpi->cpi_model == 8)
3715 cp->cp_ecx =
3716 BITX(cp->cp_ecx, 31, 24) << 16 |
3717 BITX(cp->cp_ecx, 23, 16) << 12 |
3718 BITX(cp->cp_ecx, 15, 8) << 8 |
3719 BITX(cp->cp_ecx, 7, 0);
3720 /*
3721 * model 9 stepping 1 has wrong associativity
3722 */
3723 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3724 cp->cp_ecx |= 8 << 12;
3725 break;
3726 case X86_VENDOR_Intel:
3727 /*
3728 * Extended L2 Cache features function.
3729 * First appeared on Prescott.
3730 */
3731 default:
3732 break;
3733 }
3734 break;
3735 default:
3736 break;
3737 }
3738 }
3739
3740 pass2_done:
3741 cpi->cpi_pass = 2;
3742 }
3743
3744 static const char *
3745 intel_cpubrand(const struct cpuid_info *cpi)
3746 {
3747 int i;
3748
3749 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3750 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3751 return ("i486");
3752
3753 switch (cpi->cpi_family) {
3754 case 5:
3755 return ("Intel Pentium(r)");
3756 case 6:
3757 switch (cpi->cpi_model) {
3758 uint_t celeron, xeon;
3759 const struct cpuid_regs *cp;
3760 case 0:
3761 case 1:
3762 case 2:
3763 return ("Intel Pentium(r) Pro");
3764 case 3:
3765 case 4:
3766 return ("Intel Pentium(r) II");
3767 case 6:
3768 return ("Intel Celeron(r)");
3769 case 5:
3770 case 7:
3771 celeron = xeon = 0;
3772 cp = &cpi->cpi_std[2]; /* cache info */
3773
3774 for (i = 1; i < 4; i++) {
3775 uint_t tmp;
3776
3777 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3778 if (tmp == 0x40)
3779 celeron++;
3780 if (tmp >= 0x44 && tmp <= 0x45)
3781 xeon++;
3782 }
3783
3784 for (i = 0; i < 2; i++) {
3785 uint_t tmp;
3786
3787 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3788 if (tmp == 0x40)
3789 celeron++;
3790 else if (tmp >= 0x44 && tmp <= 0x45)
3791 xeon++;
3792 }
3793
3794 for (i = 0; i < 4; i++) {
3795 uint_t tmp;
3796
3797 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3798 if (tmp == 0x40)
3799 celeron++;
3800 else if (tmp >= 0x44 && tmp <= 0x45)
3801 xeon++;
3802 }
3803
3804 for (i = 0; i < 4; i++) {
3805 uint_t tmp;
3806
3807 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3808 if (tmp == 0x40)
3809 celeron++;
3810 else if (tmp >= 0x44 && tmp <= 0x45)
3811 xeon++;
3812 }
3813
3814 if (celeron)
3815 return ("Intel Celeron(r)");
3816 if (xeon)
3817 return (cpi->cpi_model == 5 ?
3818 "Intel Pentium(r) II Xeon(tm)" :
3819 "Intel Pentium(r) III Xeon(tm)");
3820 return (cpi->cpi_model == 5 ?
3821 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3822 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3823 default:
3824 break;
3825 }
3826 default:
3827 break;
3828 }
3829
3830 /* BrandID is present if the field is nonzero */
3831 if (cpi->cpi_brandid != 0) {
3832 static const struct {
3833 uint_t bt_bid;
3834 const char *bt_str;
3835 } brand_tbl[] = {
3836 { 0x1, "Intel(r) Celeron(r)" },
3837 { 0x2, "Intel(r) Pentium(r) III" },
3838 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
3839 { 0x4, "Intel(r) Pentium(r) III" },
3840 { 0x6, "Mobile Intel(r) Pentium(r) III" },
3841 { 0x7, "Mobile Intel(r) Celeron(r)" },
3842 { 0x8, "Intel(r) Pentium(r) 4" },
3843 { 0x9, "Intel(r) Pentium(r) 4" },
3844 { 0xa, "Intel(r) Celeron(r)" },
3845 { 0xb, "Intel(r) Xeon(tm)" },
3846 { 0xc, "Intel(r) Xeon(tm) MP" },
3847 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
3848 { 0xf, "Mobile Intel(r) Celeron(r)" },
3849 { 0x11, "Mobile Genuine Intel(r)" },
3850 { 0x12, "Intel(r) Celeron(r) M" },
3851 { 0x13, "Mobile Intel(r) Celeron(r)" },
3852 { 0x14, "Intel(r) Celeron(r)" },
3853 { 0x15, "Mobile Genuine Intel(r)" },
3854 { 0x16, "Intel(r) Pentium(r) M" },
3855 { 0x17, "Mobile Intel(r) Celeron(r)" }
3856 };
3857 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3858 uint_t sgn;
3859
3860 sgn = (cpi->cpi_family << 8) |
3861 (cpi->cpi_model << 4) | cpi->cpi_step;
3862
3863 for (i = 0; i < btblmax; i++)
3864 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3865 break;
3866 if (i < btblmax) {
3867 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3868 return ("Intel(r) Celeron(r)");
3869 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3870 return ("Intel(r) Xeon(tm) MP");
3871 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3872 return ("Intel(r) Xeon(tm)");
3873 return (brand_tbl[i].bt_str);
3874 }
3875 }
3876
3877 return (NULL);
3878 }
3879
3880 static const char *
3881 amd_cpubrand(const struct cpuid_info *cpi)
3882 {
3883 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3884 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3885 return ("i486 compatible");
3886
3887 switch (cpi->cpi_family) {
3888 case 5:
3889 switch (cpi->cpi_model) {
3890 case 0:
3891 case 1:
3892 case 2:
3893 case 3:
3894 case 4:
3895 case 5:
3896 return ("AMD-K5(r)");
3897 case 6:
3898 case 7:
3899 return ("AMD-K6(r)");
3900 case 8:
3901 return ("AMD-K6(r)-2");
3902 case 9:
3903 return ("AMD-K6(r)-III");
3904 default:
3905 return ("AMD (family 5)");
3906 }
3907 case 6:
3908 switch (cpi->cpi_model) {
3909 case 1:
3910 return ("AMD-K7(tm)");
3911 case 0:
3912 case 2:
3913 case 4:
3914 return ("AMD Athlon(tm)");
3915 case 3:
3916 case 7:
3917 return ("AMD Duron(tm)");
3918 case 6:
3919 case 8:
3920 case 10:
3921 /*
3922 * Use the L2 cache size to distinguish
3923 */
3924 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3925 "AMD Athlon(tm)" : "AMD Duron(tm)");
3926 default:
3927 return ("AMD (family 6)");
3928 }
3929 default:
3930 break;
3931 }
3932
3933 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3934 cpi->cpi_brandid != 0) {
3935 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3936 case 3:
3937 return ("AMD Opteron(tm) UP 1xx");
3938 case 4:
3939 return ("AMD Opteron(tm) DP 2xx");
3940 case 5:
3941 return ("AMD Opteron(tm) MP 8xx");
3942 default:
3943 return ("AMD Opteron(tm)");
3944 }
3945 }
3946
3947 return (NULL);
3948 }
3949
3950 static const char *
3951 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3952 {
3953 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3954 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3955 type == X86_TYPE_CYRIX_486)
3956 return ("i486 compatible");
3957
3958 switch (type) {
3959 case X86_TYPE_CYRIX_6x86:
3960 return ("Cyrix 6x86");
3961 case X86_TYPE_CYRIX_6x86L:
3962 return ("Cyrix 6x86L");
3963 case X86_TYPE_CYRIX_6x86MX:
3964 return ("Cyrix 6x86MX");
3965 case X86_TYPE_CYRIX_GXm:
3966 return ("Cyrix GXm");
3967 case X86_TYPE_CYRIX_MediaGX:
3968 return ("Cyrix MediaGX");
3969 case X86_TYPE_CYRIX_MII:
3970 return ("Cyrix M2");
3971 case X86_TYPE_VIA_CYRIX_III:
3972 return ("VIA Cyrix M3");
3973 default:
3974 /*
3975 * Have another wild guess ..
3976 */
3977 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3978 return ("Cyrix 5x86");
3979 else if (cpi->cpi_family == 5) {
3980 switch (cpi->cpi_model) {
3981 case 2:
3982 return ("Cyrix 6x86"); /* Cyrix M1 */
3983 case 4:
3984 return ("Cyrix MediaGX");
3985 default:
3986 break;
3987 }
3988 } else if (cpi->cpi_family == 6) {
3989 switch (cpi->cpi_model) {
3990 case 0:
3991 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3992 case 5:
3993 case 6:
3994 case 7:
3995 case 8:
3996 case 9:
3997 return ("VIA C3");
3998 default:
3999 break;
4000 }
4001 }
4002 break;
4003 }
4004 return (NULL);
4005 }
4006
4007 /*
4008 * This only gets called in the case that the CPU extended
4009 * feature brand string (0x80000002, 0x80000003, 0x80000004)
4010 * aren't available, or contain null bytes for some reason.
4011 */
4012 static void
4013 fabricate_brandstr(struct cpuid_info *cpi)
4014 {
4015 const char *brand = NULL;
4016
4017 switch (cpi->cpi_vendor) {
4018 case X86_VENDOR_Intel:
4019 brand = intel_cpubrand(cpi);
4020 break;
4021 case X86_VENDOR_AMD:
4022 brand = amd_cpubrand(cpi);
4023 break;
4024 case X86_VENDOR_Cyrix:
4025 brand = cyrix_cpubrand(cpi, x86_type);
4026 break;
4027 case X86_VENDOR_NexGen:
4028 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4029 brand = "NexGen Nx586";
4030 break;
4031 case X86_VENDOR_Centaur:
4032 if (cpi->cpi_family == 5)
4033 switch (cpi->cpi_model) {
4034 case 4:
4035 brand = "Centaur C6";
4036 break;
4037 case 8:
4038 brand = "Centaur C2";
4039 break;
4040 case 9:
4041 brand = "Centaur C3";
4042 break;
4043 default:
4044 break;
4045 }
4046 break;
4047 case X86_VENDOR_Rise:
4048 if (cpi->cpi_family == 5 &&
4049 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4050 brand = "Rise mP6";
4051 break;
4052 case X86_VENDOR_SiS:
4053 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4054 brand = "SiS 55x";
4055 break;
4056 case X86_VENDOR_TM:
4057 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4058 brand = "Transmeta Crusoe TM3x00 or TM5x00";
4059 break;
4060 case X86_VENDOR_NSC:
4061 case X86_VENDOR_UMC:
4062 default:
4063 break;
4064 }
4065 if (brand) {
4066 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4067 return;
4068 }
4069
4070 /*
4071 * If all else fails ...
4072 */
4073 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4074 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4075 cpi->cpi_model, cpi->cpi_step);
4076 }
4077
4078 /*
4079 * This routine is called just after kernel memory allocation
4080 * becomes available on cpu0, and as part of mp_startup() on
4081 * the other cpus.
4082 *
4083 * Fixup the brand string, and collect any information from cpuid
4084 * that requires dynamically allocated storage to represent.
4085 */
4086 /*ARGSUSED*/
4087 void
4088 cpuid_pass3(cpu_t *cpu)
4089 {
4090 int i, max, shft, level, size;
4091 struct cpuid_regs regs;
4092 struct cpuid_regs *cp;
4093 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4094
4095 ASSERT(cpi->cpi_pass == 2);
4096
4097 /*
4098 * Deterministic cache parameters
4099 *
4100 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4101 * values that are present are currently defined to be the same. This
4102 * means we can use the same logic to parse it as long as we use the
4103 * appropriate leaf to get the data. If you're updating this, make sure
4104 * you're careful about which vendor supports which aspect.
4105 *
4106 * Take this opportunity to detect the number of threads sharing the
4107 * last level cache, and construct a corresponding cache id. The
4108 * respective cpuid_info members are initialized to the default case of
4109 * "no last level cache sharing".
4110 */
4111 cpi->cpi_ncpu_shr_last_cache = 1;
4112 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4113
4114 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4115 (cpi->cpi_vendor == X86_VENDOR_AMD &&
4116 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4117 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4118 uint32_t leaf;
4119
4120 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4121 leaf = 4;
4122 } else {
4123 leaf = CPUID_LEAF_EXT_1d;
4124 }
4125
4126 /*
4127 * Find the # of elements (size) returned by the leaf and along
4128 * the way detect last level cache sharing details.
4129 */
4130 bzero(®s, sizeof (regs));
4131 cp = ®s;
4132 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4133 cp->cp_eax = leaf;
4134 cp->cp_ecx = i;
4135
4136 (void) __cpuid_insn(cp);
4137
4138 if (CPI_CACHE_TYPE(cp) == 0)
4139 break;
4140 level = CPI_CACHE_LVL(cp);
4141 if (level > max) {
4142 max = level;
4143 cpi->cpi_ncpu_shr_last_cache =
4144 CPI_NTHR_SHR_CACHE(cp) + 1;
4145 }
4146 }
4147 cpi->cpi_cache_leaf_size = size = i;
4148
4149 /*
4150 * Allocate the cpi_cache_leaves array. The first element
4151 * references the regs for the corresponding leaf with %ecx set
4152 * to 0. This was gathered in cpuid_pass2().
4153 */
4154 if (size > 0) {
4155 cpi->cpi_cache_leaves =
4156 kmem_alloc(size * sizeof (cp), KM_SLEEP);
4157 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4158 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4159 } else {
4160 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4161 }
4162
4163 /*
4164 * Allocate storage to hold the additional regs
4165 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4166 *
4167 * The regs for the leaf, %ecx == 0 has already
4168 * been allocated as indicated above.
4169 */
4170 for (i = 1; i < size; i++) {
4171 cp = cpi->cpi_cache_leaves[i] =
4172 kmem_zalloc(sizeof (regs), KM_SLEEP);
4173 cp->cp_eax = leaf;
4174 cp->cp_ecx = i;
4175
4176 (void) __cpuid_insn(cp);
4177 }
4178 }
4179 /*
4180 * Determine the number of bits needed to represent
4181 * the number of CPUs sharing the last level cache.
4182 *
4183 * Shift off that number of bits from the APIC id to
4184 * derive the cache id.
4185 */
4186 shft = 0;
4187 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4188 shft++;
4189 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4190 }
4191
4192 /*
4193 * Now fixup the brand string
4194 */
4195 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4196 fabricate_brandstr(cpi);
4197 } else {
4198
4199 /*
4200 * If we successfully extracted a brand string from the cpuid
4201 * instruction, clean it up by removing leading spaces and
4202 * similar junk.
4203 */
4204 if (cpi->cpi_brandstr[0]) {
4205 size_t maxlen = sizeof (cpi->cpi_brandstr);
4206 char *src, *dst;
4207
4208 dst = src = (char *)cpi->cpi_brandstr;
4209 src[maxlen - 1] = '\0';
4210 /*
4211 * strip leading spaces
4212 */
4213 while (*src == ' ')
4214 src++;
4215 /*
4216 * Remove any 'Genuine' or "Authentic" prefixes
4217 */
4218 if (strncmp(src, "Genuine ", 8) == 0)
4219 src += 8;
4220 if (strncmp(src, "Authentic ", 10) == 0)
4221 src += 10;
4222
4223 /*
4224 * Now do an in-place copy.
4225 * Map (R) to (r) and (TM) to (tm).
4226 * The era of teletypes is long gone, and there's
4227 * -really- no need to shout.
4228 */
4229 while (*src != '\0') {
4230 if (src[0] == '(') {
4231 if (strncmp(src + 1, "R)", 2) == 0) {
4232 (void) strncpy(dst, "(r)", 3);
4233 src += 3;
4234 dst += 3;
4235 continue;
4236 }
4237 if (strncmp(src + 1, "TM)", 3) == 0) {
4238 (void) strncpy(dst, "(tm)", 4);
4239 src += 4;
4240 dst += 4;
4241 continue;
4242 }
4243 }
4244 *dst++ = *src++;
4245 }
4246 *dst = '\0';
4247
4248 /*
4249 * Finally, remove any trailing spaces
4250 */
4251 while (--dst > cpi->cpi_brandstr)
4252 if (*dst == ' ')
4253 *dst = '\0';
4254 else
4255 break;
4256 } else
4257 fabricate_brandstr(cpi);
4258 }
4259 cpi->cpi_pass = 3;
4260 }
4261
4262 /*
4263 * This routine is called out of bind_hwcap() much later in the life
4264 * of the kernel (post_startup()). The job of this routine is to resolve
4265 * the hardware feature support and kernel support for those features into
4266 * what we're actually going to tell applications via the aux vector.
4267 */
4268 void
4269 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4270 {
4271 struct cpuid_info *cpi;
4272 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4273
4274 if (cpu == NULL)
4275 cpu = CPU;
4276 cpi = cpu->cpu_m.mcpu_cpi;
4277
4278 ASSERT(cpi->cpi_pass == 3);
4279
4280 if (cpi->cpi_maxeax >= 1) {
4281 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4282 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4283 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4284
4285 *edx = CPI_FEATURES_EDX(cpi);
4286 *ecx = CPI_FEATURES_ECX(cpi);
4287 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4288
4289 /*
4290 * [these require explicit kernel support]
4291 */
4292 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4293 *edx &= ~CPUID_INTC_EDX_SEP;
4294
4295 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4296 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4297 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4298 *edx &= ~CPUID_INTC_EDX_SSE2;
4299
4300 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4301 *edx &= ~CPUID_INTC_EDX_HTT;
4302
4303 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4304 *ecx &= ~CPUID_INTC_ECX_SSE3;
4305
4306 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4307 *ecx &= ~CPUID_INTC_ECX_SSSE3;
4308 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4309 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4310 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4311 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4312 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4313 *ecx &= ~CPUID_INTC_ECX_AES;
4314 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4315 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4316 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4317 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4318 CPUID_INTC_ECX_OSXSAVE);
4319 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4320 *ecx &= ~CPUID_INTC_ECX_AVX;
4321 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4322 *ecx &= ~CPUID_INTC_ECX_F16C;
4323 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4324 *ecx &= ~CPUID_INTC_ECX_FMA;
4325 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4326 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4327 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4328 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4329 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4330 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4331 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4332 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4333 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4334 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4335
4336 /*
4337 * [no explicit support required beyond x87 fp context]
4338 */
4339 if (!fpu_exists)
4340 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4341
4342 /*
4343 * Now map the supported feature vector to things that we
4344 * think userland will care about.
4345 */
4346 if (*edx & CPUID_INTC_EDX_SEP)
4347 hwcap_flags |= AV_386_SEP;
4348 if (*edx & CPUID_INTC_EDX_SSE)
4349 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4350 if (*edx & CPUID_INTC_EDX_SSE2)
4351 hwcap_flags |= AV_386_SSE2;
4352 if (*ecx & CPUID_INTC_ECX_SSE3)
4353 hwcap_flags |= AV_386_SSE3;
4354 if (*ecx & CPUID_INTC_ECX_SSSE3)
4355 hwcap_flags |= AV_386_SSSE3;
4356 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4357 hwcap_flags |= AV_386_SSE4_1;
4358 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4359 hwcap_flags |= AV_386_SSE4_2;
4360 if (*ecx & CPUID_INTC_ECX_MOVBE)
4361 hwcap_flags |= AV_386_MOVBE;
4362 if (*ecx & CPUID_INTC_ECX_AES)
4363 hwcap_flags |= AV_386_AES;
4364 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4365 hwcap_flags |= AV_386_PCLMULQDQ;
4366 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4367 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4368 hwcap_flags |= AV_386_XSAVE;
4369
4370 if (*ecx & CPUID_INTC_ECX_AVX) {
4371 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4372 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4373
4374 hwcap_flags |= AV_386_AVX;
4375 if (*ecx & CPUID_INTC_ECX_F16C)
4376 hwcap_flags_2 |= AV_386_2_F16C;
4377 if (*ecx & CPUID_INTC_ECX_FMA)
4378 hwcap_flags_2 |= AV_386_2_FMA;
4379
4380 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4381 hwcap_flags_2 |= AV_386_2_BMI1;
4382 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4383 hwcap_flags_2 |= AV_386_2_BMI2;
4384 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4385 hwcap_flags_2 |= AV_386_2_AVX2;
4386 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4387 hwcap_flags_2 |= AV_386_2_AVX512F;
4388 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4389 hwcap_flags_2 |= AV_386_2_AVX512DQ;
4390 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4391 hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4392 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4393 hwcap_flags_2 |= AV_386_2_AVX512PF;
4394 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4395 hwcap_flags_2 |= AV_386_2_AVX512ER;
4396 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4397 hwcap_flags_2 |= AV_386_2_AVX512CD;
4398 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4399 hwcap_flags_2 |= AV_386_2_AVX512BW;
4400 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4401 hwcap_flags_2 |= AV_386_2_AVX512VL;
4402
4403 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4404 hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4405 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4406 hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4407 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4408 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4409
4410 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4411 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4412 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4413 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4414 }
4415 }
4416 if (*ecx & CPUID_INTC_ECX_VMX)
4417 hwcap_flags |= AV_386_VMX;
4418 if (*ecx & CPUID_INTC_ECX_POPCNT)
4419 hwcap_flags |= AV_386_POPCNT;
4420 if (*edx & CPUID_INTC_EDX_FPU)
4421 hwcap_flags |= AV_386_FPU;
4422 if (*edx & CPUID_INTC_EDX_MMX)
4423 hwcap_flags |= AV_386_MMX;
4424
4425 if (*edx & CPUID_INTC_EDX_TSC)
4426 hwcap_flags |= AV_386_TSC;
4427 if (*edx & CPUID_INTC_EDX_CX8)
4428 hwcap_flags |= AV_386_CX8;
4429 if (*edx & CPUID_INTC_EDX_CMOV)
4430 hwcap_flags |= AV_386_CMOV;
4431 if (*ecx & CPUID_INTC_ECX_CX16)
4432 hwcap_flags |= AV_386_CX16;
4433
4434 if (*ecx & CPUID_INTC_ECX_RDRAND)
4435 hwcap_flags_2 |= AV_386_2_RDRAND;
4436 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4437 hwcap_flags_2 |= AV_386_2_ADX;
4438 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4439 hwcap_flags_2 |= AV_386_2_RDSEED;
4440 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4441 hwcap_flags_2 |= AV_386_2_SHA;
4442 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4443 hwcap_flags_2 |= AV_386_2_FSGSBASE;
4444 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4445 hwcap_flags_2 |= AV_386_2_CLWB;
4446 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4447 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4448
4449 }
4450 /*
4451 * Check a few miscilaneous features.
4452 */
4453 if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4454 hwcap_flags_2 |= AV_386_2_CLZERO;
4455
4456 if (cpi->cpi_xmaxeax < 0x80000001)
4457 goto pass4_done;
4458
4459 switch (cpi->cpi_vendor) {
4460 struct cpuid_regs cp;
4461 uint32_t *edx, *ecx;
4462
4463 case X86_VENDOR_Intel:
4464 /*
4465 * Seems like Intel duplicated what we necessary
4466 * here to make the initial crop of 64-bit OS's work.
4467 * Hopefully, those are the only "extended" bits
4468 * they'll add.
4469 */
4470 /*FALLTHROUGH*/
4471
4472 case X86_VENDOR_AMD:
4473 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4474 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4475
4476 *edx = CPI_FEATURES_XTD_EDX(cpi);
4477 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4478
4479 /*
4480 * [these features require explicit kernel support]
4481 */
4482 switch (cpi->cpi_vendor) {
4483 case X86_VENDOR_Intel:
4484 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4485 *edx &= ~CPUID_AMD_EDX_TSCP;
4486 break;
4487
4488 case X86_VENDOR_AMD:
4489 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4490 *edx &= ~CPUID_AMD_EDX_TSCP;
4491 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4492 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4493 break;
4494
4495 default:
4496 break;
4497 }
4498
4499 /*
4500 * [no explicit support required beyond
4501 * x87 fp context and exception handlers]
4502 */
4503 if (!fpu_exists)
4504 *edx &= ~(CPUID_AMD_EDX_MMXamd |
4505 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4506
4507 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4508 *edx &= ~CPUID_AMD_EDX_NX;
4509 #if !defined(__amd64)
4510 *edx &= ~CPUID_AMD_EDX_LM;
4511 #endif
4512 /*
4513 * Now map the supported feature vector to
4514 * things that we think userland will care about.
4515 */
4516 #if defined(__amd64)
4517 if (*edx & CPUID_AMD_EDX_SYSC)
4518 hwcap_flags |= AV_386_AMD_SYSC;
4519 #endif
4520 if (*edx & CPUID_AMD_EDX_MMXamd)
4521 hwcap_flags |= AV_386_AMD_MMX;
4522 if (*edx & CPUID_AMD_EDX_3DNow)
4523 hwcap_flags |= AV_386_AMD_3DNow;
4524 if (*edx & CPUID_AMD_EDX_3DNowx)
4525 hwcap_flags |= AV_386_AMD_3DNowx;
4526 if (*ecx & CPUID_AMD_ECX_SVM)
4527 hwcap_flags |= AV_386_AMD_SVM;
4528
4529 switch (cpi->cpi_vendor) {
4530 case X86_VENDOR_AMD:
4531 if (*edx & CPUID_AMD_EDX_TSCP)
4532 hwcap_flags |= AV_386_TSCP;
4533 if (*ecx & CPUID_AMD_ECX_AHF64)
4534 hwcap_flags |= AV_386_AHF;
4535 if (*ecx & CPUID_AMD_ECX_SSE4A)
4536 hwcap_flags |= AV_386_AMD_SSE4A;
4537 if (*ecx & CPUID_AMD_ECX_LZCNT)
4538 hwcap_flags |= AV_386_AMD_LZCNT;
4539 if (*ecx & CPUID_AMD_ECX_MONITORX)
4540 hwcap_flags_2 |= AV_386_2_MONITORX;
4541 break;
4542
4543 case X86_VENDOR_Intel:
4544 if (*edx & CPUID_AMD_EDX_TSCP)
4545 hwcap_flags |= AV_386_TSCP;
4546 if (*ecx & CPUID_AMD_ECX_LZCNT)
4547 hwcap_flags |= AV_386_AMD_LZCNT;
4548 /*
4549 * Aarrgh.
4550 * Intel uses a different bit in the same word.
4551 */
4552 if (*ecx & CPUID_INTC_ECX_AHF64)
4553 hwcap_flags |= AV_386_AHF;
4554 break;
4555
4556 default:
4557 break;
4558 }
4559 break;
4560
4561 case X86_VENDOR_TM:
4562 cp.cp_eax = 0x80860001;
4563 (void) __cpuid_insn(&cp);
4564 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4565 break;
4566
4567 default:
4568 break;
4569 }
4570
4571 pass4_done:
4572 cpi->cpi_pass = 4;
4573 if (hwcap_out != NULL) {
4574 hwcap_out[0] = hwcap_flags;
4575 hwcap_out[1] = hwcap_flags_2;
4576 }
4577 }
4578
4579
4580 /*
4581 * Simulate the cpuid instruction using the data we previously
4582 * captured about this CPU. We try our best to return the truth
4583 * about the hardware, independently of kernel support.
4584 */
4585 uint32_t
4586 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4587 {
4588 struct cpuid_info *cpi;
4589 struct cpuid_regs *xcp;
4590
4591 if (cpu == NULL)
4592 cpu = CPU;
4593 cpi = cpu->cpu_m.mcpu_cpi;
4594
4595 ASSERT(cpuid_checkpass(cpu, 3));
4596
4597 /*
4598 * CPUID data is cached in two separate places: cpi_std for standard
4599 * CPUID leaves , and cpi_extd for extended CPUID leaves.
4600 */
4601 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4602 xcp = &cpi->cpi_std[cp->cp_eax];
4603 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4604 cp->cp_eax <= cpi->cpi_xmaxeax &&
4605 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4606 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4607 } else {
4608 /*
4609 * The caller is asking for data from an input parameter which
4610 * the kernel has not cached. In this case we go fetch from
4611 * the hardware and return the data directly to the user.
4612 */
4613 return (__cpuid_insn(cp));
4614 }
4615
4616 cp->cp_eax = xcp->cp_eax;
4617 cp->cp_ebx = xcp->cp_ebx;
4618 cp->cp_ecx = xcp->cp_ecx;
4619 cp->cp_edx = xcp->cp_edx;
4620 return (cp->cp_eax);
4621 }
4622
4623 int
4624 cpuid_checkpass(cpu_t *cpu, int pass)
4625 {
4626 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4627 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4628 }
4629
4630 int
4631 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4632 {
4633 ASSERT(cpuid_checkpass(cpu, 3));
4634
4635 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4636 }
4637
4638 int
4639 cpuid_is_cmt(cpu_t *cpu)
4640 {
4641 if (cpu == NULL)
4642 cpu = CPU;
4643
4644 ASSERT(cpuid_checkpass(cpu, 1));
4645
4646 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4647 }
4648
4649 /*
4650 * AMD and Intel both implement the 64-bit variant of the syscall
4651 * instruction (syscallq), so if there's -any- support for syscall,
4652 * cpuid currently says "yes, we support this".
4653 *
4654 * However, Intel decided to -not- implement the 32-bit variant of the
4655 * syscall instruction, so we provide a predicate to allow our caller
4656 * to test that subtlety here.
4657 *
4658 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4659 * even in the case where the hardware would in fact support it.
4660 */
4661 /*ARGSUSED*/
4662 int
4663 cpuid_syscall32_insn(cpu_t *cpu)
4664 {
4665 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4666
4667 #if !defined(__xpv)
4668 if (cpu == NULL)
4669 cpu = CPU;
4670
4671 /*CSTYLED*/
4672 {
4673 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4674
4675 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4676 cpi->cpi_xmaxeax >= 0x80000001 &&
4677 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4678 return (1);
4679 }
4680 #endif
4681 return (0);
4682 }
4683
4684 int
4685 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4686 {
4687 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4688
4689 static const char fmt[] =
4690 "x86 (%s %X family %d model %d step %d clock %d MHz)";
4691 static const char fmt_ht[] =
4692 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4693
4694 ASSERT(cpuid_checkpass(cpu, 1));
4695
4696 if (cpuid_is_cmt(cpu))
4697 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4698 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4699 cpi->cpi_family, cpi->cpi_model,
4700 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4701 return (snprintf(s, n, fmt,
4702 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4703 cpi->cpi_family, cpi->cpi_model,
4704 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4705 }
4706
4707 const char *
4708 cpuid_getvendorstr(cpu_t *cpu)
4709 {
4710 ASSERT(cpuid_checkpass(cpu, 1));
4711 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4712 }
4713
4714 uint_t
4715 cpuid_getvendor(cpu_t *cpu)
4716 {
4717 ASSERT(cpuid_checkpass(cpu, 1));
4718 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4719 }
4720
4721 uint_t
4722 cpuid_getfamily(cpu_t *cpu)
4723 {
4724 ASSERT(cpuid_checkpass(cpu, 1));
4725 return (cpu->cpu_m.mcpu_cpi->cpi_family);
4726 }
4727
4728 uint_t
4729 cpuid_getmodel(cpu_t *cpu)
4730 {
4731 ASSERT(cpuid_checkpass(cpu, 1));
4732 return (cpu->cpu_m.mcpu_cpi->cpi_model);
4733 }
4734
4735 uint_t
4736 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4737 {
4738 ASSERT(cpuid_checkpass(cpu, 1));
4739 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4740 }
4741
4742 uint_t
4743 cpuid_get_ncore_per_chip(cpu_t *cpu)
4744 {
4745 ASSERT(cpuid_checkpass(cpu, 1));
4746 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4747 }
4748
4749 uint_t
4750 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4751 {
4752 ASSERT(cpuid_checkpass(cpu, 2));
4753 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4754 }
4755
4756 id_t
4757 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4758 {
4759 ASSERT(cpuid_checkpass(cpu, 2));
4760 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4761 }
4762
4763 uint_t
4764 cpuid_getstep(cpu_t *cpu)
4765 {
4766 ASSERT(cpuid_checkpass(cpu, 1));
4767 return (cpu->cpu_m.mcpu_cpi->cpi_step);
4768 }
4769
4770 uint_t
4771 cpuid_getsig(struct cpu *cpu)
4772 {
4773 ASSERT(cpuid_checkpass(cpu, 1));
4774 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4775 }
4776
4777 uint32_t
4778 cpuid_getchiprev(struct cpu *cpu)
4779 {
4780 ASSERT(cpuid_checkpass(cpu, 1));
4781 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4782 }
4783
4784 const char *
4785 cpuid_getchiprevstr(struct cpu *cpu)
4786 {
4787 ASSERT(cpuid_checkpass(cpu, 1));
4788 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4789 }
4790
4791 uint32_t
4792 cpuid_getsockettype(struct cpu *cpu)
4793 {
4794 ASSERT(cpuid_checkpass(cpu, 1));
4795 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4796 }
4797
4798 const char *
4799 cpuid_getsocketstr(cpu_t *cpu)
4800 {
4801 static const char *socketstr = NULL;
4802 struct cpuid_info *cpi;
4803
4804 ASSERT(cpuid_checkpass(cpu, 1));
4805 cpi = cpu->cpu_m.mcpu_cpi;
4806
4807 /* Assume that socket types are the same across the system */
4808 if (socketstr == NULL)
4809 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4810 cpi->cpi_model, cpi->cpi_step);
4811
4812
4813 return (socketstr);
4814 }
4815
4816 int
4817 cpuid_get_chipid(cpu_t *cpu)
4818 {
4819 ASSERT(cpuid_checkpass(cpu, 1));
4820
4821 if (cpuid_is_cmt(cpu))
4822 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4823 return (cpu->cpu_id);
4824 }
4825
4826 id_t
4827 cpuid_get_coreid(cpu_t *cpu)
4828 {
4829 ASSERT(cpuid_checkpass(cpu, 1));
4830 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4831 }
4832
4833 int
4834 cpuid_get_pkgcoreid(cpu_t *cpu)
4835 {
4836 ASSERT(cpuid_checkpass(cpu, 1));
4837 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4838 }
4839
4840 int
4841 cpuid_get_clogid(cpu_t *cpu)
4842 {
4843 ASSERT(cpuid_checkpass(cpu, 1));
4844 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4845 }
4846
4847 int
4848 cpuid_get_cacheid(cpu_t *cpu)
4849 {
4850 ASSERT(cpuid_checkpass(cpu, 1));
4851 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4852 }
4853
4854 uint_t
4855 cpuid_get_procnodeid(cpu_t *cpu)
4856 {
4857 ASSERT(cpuid_checkpass(cpu, 1));
4858 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4859 }
4860
4861 uint_t
4862 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4863 {
4864 ASSERT(cpuid_checkpass(cpu, 1));
4865 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4866 }
4867
4868 uint_t
4869 cpuid_get_compunitid(cpu_t *cpu)
4870 {
4871 ASSERT(cpuid_checkpass(cpu, 1));
4872 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4873 }
4874
4875 uint_t
4876 cpuid_get_cores_per_compunit(cpu_t *cpu)
4877 {
4878 ASSERT(cpuid_checkpass(cpu, 1));
4879 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4880 }
4881
4882 /*ARGSUSED*/
4883 int
4884 cpuid_have_cr8access(cpu_t *cpu)
4885 {
4886 #if defined(__amd64)
4887 return (1);
4888 #else
4889 struct cpuid_info *cpi;
4890
4891 ASSERT(cpu != NULL);
4892 cpi = cpu->cpu_m.mcpu_cpi;
4893 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4894 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4895 return (1);
4896 return (0);
4897 #endif
4898 }
4899
4900 uint32_t
4901 cpuid_get_apicid(cpu_t *cpu)
4902 {
4903 ASSERT(cpuid_checkpass(cpu, 1));
4904 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4905 return (UINT32_MAX);
4906 } else {
4907 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4908 }
4909 }
4910
4911 void
4912 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4913 {
4914 struct cpuid_info *cpi;
4915
4916 if (cpu == NULL)
4917 cpu = CPU;
4918 cpi = cpu->cpu_m.mcpu_cpi;
4919
4920 ASSERT(cpuid_checkpass(cpu, 1));
4921
4922 if (pabits)
4923 *pabits = cpi->cpi_pabits;
4924 if (vabits)
4925 *vabits = cpi->cpi_vabits;
4926 }
4927
4928 size_t
4929 cpuid_get_xsave_size()
4930 {
4931 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4932 sizeof (struct xsave_state)));
4933 }
4934
4935 /*
4936 * Return true if the CPUs on this system require 'pointer clearing' for the
4937 * floating point error pointer exception handling. In the past, this has been
4938 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4939 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4940 * feature bit and is reflected in the cpi_fp_amd_save member.
4941 */
4942 boolean_t
4943 cpuid_need_fp_excp_handling()
4944 {
4945 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4946 cpuid_info0.cpi_fp_amd_save != 0);
4947 }
4948
4949 /*
4950 * Returns the number of data TLB entries for a corresponding
4951 * pagesize. If it can't be computed, or isn't known, the
4952 * routine returns zero. If you ask about an architecturally
4953 * impossible pagesize, the routine will panic (so that the
4954 * hat implementor knows that things are inconsistent.)
4955 */
4956 uint_t
4957 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4958 {
4959 struct cpuid_info *cpi;
4960 uint_t dtlb_nent = 0;
4961
4962 if (cpu == NULL)
4963 cpu = CPU;
4964 cpi = cpu->cpu_m.mcpu_cpi;
4965
4966 ASSERT(cpuid_checkpass(cpu, 1));
4967
4968 /*
4969 * Check the L2 TLB info
4970 */
4971 if (cpi->cpi_xmaxeax >= 0x80000006) {
4972 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4973
4974 switch (pagesize) {
4975
4976 case 4 * 1024:
4977 /*
4978 * All zero in the top 16 bits of the register
4979 * indicates a unified TLB. Size is in low 16 bits.
4980 */
4981 if ((cp->cp_ebx & 0xffff0000) == 0)
4982 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4983 else
4984 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4985 break;
4986
4987 case 2 * 1024 * 1024:
4988 if ((cp->cp_eax & 0xffff0000) == 0)
4989 dtlb_nent = cp->cp_eax & 0x0000ffff;
4990 else
4991 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4992 break;
4993
4994 default:
4995 panic("unknown L2 pagesize");
4996 /*NOTREACHED*/
4997 }
4998 }
4999
5000 if (dtlb_nent != 0)
5001 return (dtlb_nent);
5002
5003 /*
5004 * No L2 TLB support for this size, try L1.
5005 */
5006 if (cpi->cpi_xmaxeax >= 0x80000005) {
5007 struct cpuid_regs *cp = &cpi->cpi_extd[5];
5008
5009 switch (pagesize) {
5010 case 4 * 1024:
5011 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
5012 break;
5013 case 2 * 1024 * 1024:
5014 dtlb_nent = BITX(cp->cp_eax, 23, 16);
5015 break;
5016 default:
5017 panic("unknown L1 d-TLB pagesize");
5018 /*NOTREACHED*/
5019 }
5020 }
5021
5022 return (dtlb_nent);
5023 }
5024
5025 /*
5026 * Return 0 if the erratum is not present or not applicable, positive
5027 * if it is, and negative if the status of the erratum is unknown.
5028 *
5029 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
5030 * Processors" #25759, Rev 3.57, August 2005
5031 */
5032 int
5033 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
5034 {
5035 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
5036 uint_t eax;
5037
5038 /*
5039 * Bail out if this CPU isn't an AMD CPU, or if it's
5040 * a legacy (32-bit) AMD CPU.
5041 */
5042 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
5043 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
5044 cpi->cpi_family == 6) {
5045 return (0);
5046 }
5047
5048 eax = cpi->cpi_std[1].cp_eax;
5049
5050 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
5051 #define SH_B3(eax) (eax == 0xf51)
5052 #define B(eax) (SH_B0(eax) || SH_B3(eax))
5053
5054 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
5055
5056 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5057 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5058 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
5059 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5060
5061 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5062 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
5063 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
5064 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5065
5066 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5067 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
5068 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
5069 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
5070 #define BH_E4(eax) (eax == 0x20fb1)
5071 #define SH_E5(eax) (eax == 0x20f42)
5072 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
5073 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
5074 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5075 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5076 DH_E6(eax) || JH_E6(eax))
5077
5078 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5079 #define DR_B0(eax) (eax == 0x100f20)
5080 #define DR_B1(eax) (eax == 0x100f21)
5081 #define DR_BA(eax) (eax == 0x100f2a)
5082 #define DR_B2(eax) (eax == 0x100f22)
5083 #define DR_B3(eax) (eax == 0x100f23)
5084 #define RB_C0(eax) (eax == 0x100f40)
5085
5086 switch (erratum) {
5087 case 1:
5088 return (cpi->cpi_family < 0x10);
5089 case 51: /* what does the asterisk mean? */
5090 return (B(eax) || SH_C0(eax) || CG(eax));
5091 case 52:
5092 return (B(eax));
5093 case 57:
5094 return (cpi->cpi_family <= 0x11);
5095 case 58:
5096 return (B(eax));
5097 case 60:
5098 return (cpi->cpi_family <= 0x11);
5099 case 61:
5100 case 62:
5101 case 63:
5102 case 64:
5103 case 65:
5104 case 66:
5105 case 68:
5106 case 69:
5107 case 70:
5108 case 71:
5109 return (B(eax));
5110 case 72:
5111 return (SH_B0(eax));
5112 case 74:
5113 return (B(eax));
5114 case 75:
5115 return (cpi->cpi_family < 0x10);
5116 case 76:
5117 return (B(eax));
5118 case 77:
5119 return (cpi->cpi_family <= 0x11);
5120 case 78:
5121 return (B(eax) || SH_C0(eax));
5122 case 79:
5123 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5124 case 80:
5125 case 81:
5126 case 82:
5127 return (B(eax));
5128 case 83:
5129 return (B(eax) || SH_C0(eax) || CG(eax));
5130 case 85:
5131 return (cpi->cpi_family < 0x10);
5132 case 86:
5133 return (SH_C0(eax) || CG(eax));
5134 case 88:
5135 #if !defined(__amd64)
5136 return (0);
5137 #else
5138 return (B(eax) || SH_C0(eax));
5139 #endif
5140 case 89:
5141 return (cpi->cpi_family < 0x10);
5142 case 90:
5143 return (B(eax) || SH_C0(eax) || CG(eax));
5144 case 91:
5145 case 92:
5146 return (B(eax) || SH_C0(eax));
5147 case 93:
5148 return (SH_C0(eax));
5149 case 94:
5150 return (B(eax) || SH_C0(eax) || CG(eax));
5151 case 95:
5152 #if !defined(__amd64)
5153 return (0);
5154 #else
5155 return (B(eax) || SH_C0(eax));
5156 #endif
5157 case 96:
5158 return (B(eax) || SH_C0(eax) || CG(eax));
5159 case 97:
5160 case 98:
5161 return (SH_C0(eax) || CG(eax));
5162 case 99:
5163 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5164 case 100:
5165 return (B(eax) || SH_C0(eax));
5166 case 101:
5167 case 103:
5168 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5169 case 104:
5170 return (SH_C0(eax) || CG(eax) || D0(eax));
5171 case 105:
5172 case 106:
5173 case 107:
5174 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5175 case 108:
5176 return (DH_CG(eax));
5177 case 109:
5178 return (SH_C0(eax) || CG(eax) || D0(eax));
5179 case 110:
5180 return (D0(eax) || EX(eax));
5181 case 111:
5182 return (CG(eax));
5183 case 112:
5184 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5185 case 113:
5186 return (eax == 0x20fc0);
5187 case 114:
5188 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5189 case 115:
5190 return (SH_E0(eax) || JH_E1(eax));
5191 case 116:
5192 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5193 case 117:
5194 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5195 case 118:
5196 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5197 JH_E6(eax));
5198 case 121:
5199 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5200 case 122:
5201 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5202 case 123:
5203 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5204 case 131:
5205 return (cpi->cpi_family < 0x10);
5206 case 6336786:
5207
5208 /*
5209 * Test for AdvPowerMgmtInfo.TscPStateInvariant
5210 * if this is a K8 family or newer processor. We're testing for
5211 * this 'erratum' to determine whether or not we have a constant
5212 * TSC.
5213 *
5214 * Our current fix for this is to disable the C1-Clock ramping.
5215 * However, this doesn't work on newer processor families nor
5216 * does it work when virtualized as those devices don't exist.
5217 */
5218 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5219 return (0);
5220 }
5221
5222 if (CPI_FAMILY(cpi) == 0xf) {
5223 struct cpuid_regs regs;
5224 regs.cp_eax = 0x80000007;
5225 (void) __cpuid_insn(®s);
5226 return (!(regs.cp_edx & 0x100));
5227 }
5228 return (0);
5229 case 6323525:
5230 /*
5231 * This erratum (K8 #147) is not present on family 10 and newer.
5232 */
5233 if (cpi->cpi_family >= 0x10) {
5234 return (0);
5235 }
5236 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5237 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5238
5239 case 6671130:
5240 /*
5241 * check for processors (pre-Shanghai) that do not provide
5242 * optimal management of 1gb ptes in its tlb.
5243 */
5244 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5245
5246 case 298:
5247 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5248 DR_B2(eax) || RB_C0(eax));
5249
5250 case 721:
5251 #if defined(__amd64)
5252 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5253 #else
5254 return (0);
5255 #endif
5256
5257 default:
5258 return (-1);
5259
5260 }
5261 }
5262
5263 /*
5264 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5265 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5266 */
5267 int
5268 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5269 {
5270 struct cpuid_info *cpi;
5271 uint_t osvwid;
5272 static int osvwfeature = -1;
5273 uint64_t osvwlength;
5274
5275
5276 cpi = cpu->cpu_m.mcpu_cpi;
5277
5278 /* confirm OSVW supported */
5279 if (osvwfeature == -1) {
5280 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5281 } else {
5282 /* assert that osvw feature setting is consistent on all cpus */
5283 ASSERT(osvwfeature ==
5284 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5285 }
5286 if (!osvwfeature)
5287 return (-1);
5288
5289 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5290
5291 switch (erratum) {
5292 case 298: /* osvwid is 0 */
5293 osvwid = 0;
5294 if (osvwlength <= (uint64_t)osvwid) {
5295 /* osvwid 0 is unknown */
5296 return (-1);
5297 }
5298
5299 /*
5300 * Check the OSVW STATUS MSR to determine the state
5301 * of the erratum where:
5302 * 0 - fixed by HW
5303 * 1 - BIOS has applied the workaround when BIOS
5304 * workaround is available. (Or for other errata,
5305 * OS workaround is required.)
5306 * For a value of 1, caller will confirm that the
5307 * erratum 298 workaround has indeed been applied by BIOS.
5308 *
5309 * A 1 may be set in cpus that have a HW fix
5310 * in a mixed cpu system. Regarding erratum 298:
5311 * In a multiprocessor platform, the workaround above
5312 * should be applied to all processors regardless of
5313 * silicon revision when an affected processor is
5314 * present.
5315 */
5316
5317 return (rdmsr(MSR_AMD_OSVW_STATUS +
5318 (osvwid / OSVW_ID_CNT_PER_MSR)) &
5319 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5320
5321 default:
5322 return (-1);
5323 }
5324 }
5325
5326 static const char assoc_str[] = "associativity";
5327 static const char line_str[] = "line-size";
5328 static const char size_str[] = "size";
5329
5330 static void
5331 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5332 uint32_t val)
5333 {
5334 char buf[128];
5335
5336 /*
5337 * ndi_prop_update_int() is used because it is desirable for
5338 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5339 */
5340 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5341 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5342 }
5343
5344 /*
5345 * Intel-style cache/tlb description
5346 *
5347 * Standard cpuid level 2 gives a randomly ordered
5348 * selection of tags that index into a table that describes
5349 * cache and tlb properties.
5350 */
5351
5352 static const char l1_icache_str[] = "l1-icache";
5353 static const char l1_dcache_str[] = "l1-dcache";
5354 static const char l2_cache_str[] = "l2-cache";
5355 static const char l3_cache_str[] = "l3-cache";
5356 static const char itlb4k_str[] = "itlb-4K";
5357 static const char dtlb4k_str[] = "dtlb-4K";
5358 static const char itlb2M_str[] = "itlb-2M";
5359 static const char itlb4M_str[] = "itlb-4M";
5360 static const char dtlb4M_str[] = "dtlb-4M";
5361 static const char dtlb24_str[] = "dtlb0-2M-4M";
5362 static const char itlb424_str[] = "itlb-4K-2M-4M";
5363 static const char itlb24_str[] = "itlb-2M-4M";
5364 static const char dtlb44_str[] = "dtlb-4K-4M";
5365 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5366 static const char sl2_cache_str[] = "sectored-l2-cache";
5367 static const char itrace_str[] = "itrace-cache";
5368 static const char sl3_cache_str[] = "sectored-l3-cache";
5369 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5370
5371 static const struct cachetab {
5372 uint8_t ct_code;
5373 uint8_t ct_assoc;
5374 uint16_t ct_line_size;
5375 size_t ct_size;
5376 const char *ct_label;
5377 } intel_ctab[] = {
5378 /*
5379 * maintain descending order!
5380 *
5381 * Codes ignored - Reason
5382 * ----------------------
5383 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5384 * f0H/f1H - Currently we do not interpret prefetch size by design
5385 */
5386 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5387 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5388 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5389 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5390 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5391 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5392 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5393 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5394 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5395 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5396 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5397 { 0xd0, 4, 64, 512*1024, l3_cache_str},
5398 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5399 { 0xc0, 4, 0, 8, dtlb44_str },
5400 { 0xba, 4, 0, 64, dtlb4k_str },
5401 { 0xb4, 4, 0, 256, dtlb4k_str },
5402 { 0xb3, 4, 0, 128, dtlb4k_str },
5403 { 0xb2, 4, 0, 64, itlb4k_str },
5404 { 0xb0, 4, 0, 128, itlb4k_str },
5405 { 0x87, 8, 64, 1024*1024, l2_cache_str},
5406 { 0x86, 4, 64, 512*1024, l2_cache_str},
5407 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5408 { 0x84, 8, 32, 1024*1024, l2_cache_str},
5409 { 0x83, 8, 32, 512*1024, l2_cache_str},
5410 { 0x82, 8, 32, 256*1024, l2_cache_str},
5411 { 0x80, 8, 64, 512*1024, l2_cache_str},
5412 { 0x7f, 2, 64, 512*1024, l2_cache_str},
5413 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5414 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5415 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5416 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5417 { 0x79, 8, 64, 128*1024, sl2_cache_str},
5418 { 0x78, 8, 64, 1024*1024, l2_cache_str},
5419 { 0x73, 8, 0, 64*1024, itrace_str},
5420 { 0x72, 8, 0, 32*1024, itrace_str},
5421 { 0x71, 8, 0, 16*1024, itrace_str},
5422 { 0x70, 8, 0, 12*1024, itrace_str},
5423 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5424 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5425 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5426 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5427 { 0x5d, 0, 0, 256, dtlb44_str},
5428 { 0x5c, 0, 0, 128, dtlb44_str},
5429 { 0x5b, 0, 0, 64, dtlb44_str},
5430 { 0x5a, 4, 0, 32, dtlb24_str},
5431 { 0x59, 0, 0, 16, dtlb4k_str},
5432 { 0x57, 4, 0, 16, dtlb4k_str},
5433 { 0x56, 4, 0, 16, dtlb4M_str},
5434 { 0x55, 0, 0, 7, itlb24_str},
5435 { 0x52, 0, 0, 256, itlb424_str},
5436 { 0x51, 0, 0, 128, itlb424_str},
5437 { 0x50, 0, 0, 64, itlb424_str},
5438 { 0x4f, 0, 0, 32, itlb4k_str},
5439 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5440 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5441 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5442 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5443 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5444 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5445 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5446 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5447 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5448 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5449 { 0x44, 4, 32, 1024*1024, l2_cache_str},
5450 { 0x43, 4, 32, 512*1024, l2_cache_str},
5451 { 0x42, 4, 32, 256*1024, l2_cache_str},
5452 { 0x41, 4, 32, 128*1024, l2_cache_str},
5453 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5454 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5455 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5456 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5457 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5458 { 0x39, 4, 64, 128*1024, sl2_cache_str},
5459 { 0x30, 8, 64, 32*1024, l1_icache_str},
5460 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5461 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5462 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5463 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5464 { 0x22, 4, 64, 512*1024, sl3_cache_str},
5465 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5466 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5467 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5468 { 0x0b, 4, 0, 4, itlb4M_str},
5469 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5470 { 0x08, 4, 32, 16*1024, l1_icache_str},
5471 { 0x06, 4, 32, 8*1024, l1_icache_str},
5472 { 0x05, 4, 0, 32, dtlb4M_str},
5473 { 0x04, 4, 0, 8, dtlb4M_str},
5474 { 0x03, 4, 0, 64, dtlb4k_str},
5475 { 0x02, 4, 0, 2, itlb4M_str},
5476 { 0x01, 4, 0, 32, itlb4k_str},
5477 { 0 }
5478 };
5479
5480 static const struct cachetab cyrix_ctab[] = {
5481 { 0x70, 4, 0, 32, "tlb-4K" },
5482 { 0x80, 4, 16, 16*1024, "l1-cache" },
5483 { 0 }
5484 };
5485
5486 /*
5487 * Search a cache table for a matching entry
5488 */
5489 static const struct cachetab *
5490 find_cacheent(const struct cachetab *ct, uint_t code)
5491 {
5492 if (code != 0) {
5493 for (; ct->ct_code != 0; ct++)
5494 if (ct->ct_code <= code)
5495 break;
5496 if (ct->ct_code == code)
5497 return (ct);
5498 }
5499 return (NULL);
5500 }
5501
5502 /*
5503 * Populate cachetab entry with L2 or L3 cache-information using
5504 * cpuid function 4. This function is called from intel_walk_cacheinfo()
5505 * when descriptor 0x49 is encountered. It returns 0 if no such cache
5506 * information is found.
5507 */
5508 static int
5509 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5510 {
5511 uint32_t level, i;
5512 int ret = 0;
5513
5514 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5515 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5516
5517 if (level == 2 || level == 3) {
5518 ct->ct_assoc =
5519 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5520 ct->ct_line_size =
5521 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5522 ct->ct_size = ct->ct_assoc *
5523 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5524 ct->ct_line_size *
5525 (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5526
5527 if (level == 2) {
5528 ct->ct_label = l2_cache_str;
5529 } else if (level == 3) {
5530 ct->ct_label = l3_cache_str;
5531 }
5532 ret = 1;
5533 }
5534 }
5535
5536 return (ret);
5537 }
5538
5539 /*
5540 * Walk the cacheinfo descriptor, applying 'func' to every valid element
5541 * The walk is terminated if the walker returns non-zero.
5542 */
5543 static void
5544 intel_walk_cacheinfo(struct cpuid_info *cpi,
5545 void *arg, int (*func)(void *, const struct cachetab *))
5546 {
5547 const struct cachetab *ct;
5548 struct cachetab des_49_ct, des_b1_ct;
5549 uint8_t *dp;
5550 int i;
5551
5552 if ((dp = cpi->cpi_cacheinfo) == NULL)
5553 return;
5554 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5555 /*
5556 * For overloaded descriptor 0x49 we use cpuid function 4
5557 * if supported by the current processor, to create
5558 * cache information.
5559 * For overloaded descriptor 0xb1 we use X86_PAE flag
5560 * to disambiguate the cache information.
5561 */
5562 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5563 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5564 ct = &des_49_ct;
5565 } else if (*dp == 0xb1) {
5566 des_b1_ct.ct_code = 0xb1;
5567 des_b1_ct.ct_assoc = 4;
5568 des_b1_ct.ct_line_size = 0;
5569 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5570 des_b1_ct.ct_size = 8;
5571 des_b1_ct.ct_label = itlb2M_str;
5572 } else {
5573 des_b1_ct.ct_size = 4;
5574 des_b1_ct.ct_label = itlb4M_str;
5575 }
5576 ct = &des_b1_ct;
5577 } else {
5578 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5579 continue;
5580 }
5581 }
5582
5583 if (func(arg, ct) != 0) {
5584 break;
5585 }
5586 }
5587 }
5588
5589 /*
5590 * (Like the Intel one, except for Cyrix CPUs)
5591 */
5592 static void
5593 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5594 void *arg, int (*func)(void *, const struct cachetab *))
5595 {
5596 const struct cachetab *ct;
5597 uint8_t *dp;
5598 int i;
5599
5600 if ((dp = cpi->cpi_cacheinfo) == NULL)
5601 return;
5602 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5603 /*
5604 * Search Cyrix-specific descriptor table first ..
5605 */
5606 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5607 if (func(arg, ct) != 0)
5608 break;
5609 continue;
5610 }
5611 /*
5612 * .. else fall back to the Intel one
5613 */
5614 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5615 if (func(arg, ct) != 0)
5616 break;
5617 continue;
5618 }
5619 }
5620 }
5621
5622 /*
5623 * A cacheinfo walker that adds associativity, line-size, and size properties
5624 * to the devinfo node it is passed as an argument.
5625 */
5626 static int
5627 add_cacheent_props(void *arg, const struct cachetab *ct)
5628 {
5629 dev_info_t *devi = arg;
5630
5631 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5632 if (ct->ct_line_size != 0)
5633 add_cache_prop(devi, ct->ct_label, line_str,
5634 ct->ct_line_size);
5635 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5636 return (0);
5637 }
5638
5639
5640 static const char fully_assoc[] = "fully-associative?";
5641
5642 /*
5643 * AMD style cache/tlb description
5644 *
5645 * Extended functions 5 and 6 directly describe properties of
5646 * tlbs and various cache levels.
5647 */
5648 static void
5649 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5650 {
5651 switch (assoc) {
5652 case 0: /* reserved; ignore */
5653 break;
5654 default:
5655 add_cache_prop(devi, label, assoc_str, assoc);
5656 break;
5657 case 0xff:
5658 add_cache_prop(devi, label, fully_assoc, 1);
5659 break;
5660 }
5661 }
5662
5663 static void
5664 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5665 {
5666 if (size == 0)
5667 return;
5668 add_cache_prop(devi, label, size_str, size);
5669 add_amd_assoc(devi, label, assoc);
5670 }
5671
5672 static void
5673 add_amd_cache(dev_info_t *devi, const char *label,
5674 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5675 {
5676 if (size == 0 || line_size == 0)
5677 return;
5678 add_amd_assoc(devi, label, assoc);
5679 /*
5680 * Most AMD parts have a sectored cache. Multiple cache lines are
5681 * associated with each tag. A sector consists of all cache lines
5682 * associated with a tag. For example, the AMD K6-III has a sector
5683 * size of 2 cache lines per tag.
5684 */
5685 if (lines_per_tag != 0)
5686 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5687 add_cache_prop(devi, label, line_str, line_size);
5688 add_cache_prop(devi, label, size_str, size * 1024);
5689 }
5690
5691 static void
5692 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5693 {
5694 switch (assoc) {
5695 case 0: /* off */
5696 break;
5697 case 1:
5698 case 2:
5699 case 4:
5700 add_cache_prop(devi, label, assoc_str, assoc);
5701 break;
5702 case 6:
5703 add_cache_prop(devi, label, assoc_str, 8);
5704 break;
5705 case 8:
5706 add_cache_prop(devi, label, assoc_str, 16);
5707 break;
5708 case 0xf:
5709 add_cache_prop(devi, label, fully_assoc, 1);
5710 break;
5711 default: /* reserved; ignore */
5712 break;
5713 }
5714 }
5715
5716 static void
5717 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5718 {
5719 if (size == 0 || assoc == 0)
5720 return;
5721 add_amd_l2_assoc(devi, label, assoc);
5722 add_cache_prop(devi, label, size_str, size);
5723 }
5724
5725 static void
5726 add_amd_l2_cache(dev_info_t *devi, const char *label,
5727 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5728 {
5729 if (size == 0 || assoc == 0 || line_size == 0)
5730 return;
5731 add_amd_l2_assoc(devi, label, assoc);
5732 if (lines_per_tag != 0)
5733 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5734 add_cache_prop(devi, label, line_str, line_size);
5735 add_cache_prop(devi, label, size_str, size * 1024);
5736 }
5737
5738 static void
5739 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5740 {
5741 struct cpuid_regs *cp;
5742
5743 if (cpi->cpi_xmaxeax < 0x80000005)
5744 return;
5745 cp = &cpi->cpi_extd[5];
5746
5747 /*
5748 * 4M/2M L1 TLB configuration
5749 *
5750 * We report the size for 2M pages because AMD uses two
5751 * TLB entries for one 4M page.
5752 */
5753 add_amd_tlb(devi, "dtlb-2M",
5754 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5755 add_amd_tlb(devi, "itlb-2M",
5756 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5757
5758 /*
5759 * 4K L1 TLB configuration
5760 */
5761
5762 switch (cpi->cpi_vendor) {
5763 uint_t nentries;
5764 case X86_VENDOR_TM:
5765 if (cpi->cpi_family >= 5) {
5766 /*
5767 * Crusoe processors have 256 TLB entries, but
5768 * cpuid data format constrains them to only
5769 * reporting 255 of them.
5770 */
5771 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5772 nentries = 256;
5773 /*
5774 * Crusoe processors also have a unified TLB
5775 */
5776 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5777 nentries);
5778 break;
5779 }
5780 /*FALLTHROUGH*/
5781 default:
5782 add_amd_tlb(devi, itlb4k_str,
5783 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5784 add_amd_tlb(devi, dtlb4k_str,
5785 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5786 break;
5787 }
5788
5789 /*
5790 * data L1 cache configuration
5791 */
5792
5793 add_amd_cache(devi, l1_dcache_str,
5794 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5795 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5796
5797 /*
5798 * code L1 cache configuration
5799 */
5800
5801 add_amd_cache(devi, l1_icache_str,
5802 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5803 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5804
5805 if (cpi->cpi_xmaxeax < 0x80000006)
5806 return;
5807 cp = &cpi->cpi_extd[6];
5808
5809 /* Check for a unified L2 TLB for large pages */
5810
5811 if (BITX(cp->cp_eax, 31, 16) == 0)
5812 add_amd_l2_tlb(devi, "l2-tlb-2M",
5813 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5814 else {
5815 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5816 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5817 add_amd_l2_tlb(devi, "l2-itlb-2M",
5818 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5819 }
5820
5821 /* Check for a unified L2 TLB for 4K pages */
5822
5823 if (BITX(cp->cp_ebx, 31, 16) == 0) {
5824 add_amd_l2_tlb(devi, "l2-tlb-4K",
5825 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5826 } else {
5827 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5828 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5829 add_amd_l2_tlb(devi, "l2-itlb-4K",
5830 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5831 }
5832
5833 add_amd_l2_cache(devi, l2_cache_str,
5834 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5835 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5836 }
5837
5838 /*
5839 * There are two basic ways that the x86 world describes it cache
5840 * and tlb architecture - Intel's way and AMD's way.
5841 *
5842 * Return which flavor of cache architecture we should use
5843 */
5844 static int
5845 x86_which_cacheinfo(struct cpuid_info *cpi)
5846 {
5847 switch (cpi->cpi_vendor) {
5848 case X86_VENDOR_Intel:
5849 if (cpi->cpi_maxeax >= 2)
5850 return (X86_VENDOR_Intel);
5851 break;
5852 case X86_VENDOR_AMD:
5853 /*
5854 * The K5 model 1 was the first part from AMD that reported
5855 * cache sizes via extended cpuid functions.
5856 */
5857 if (cpi->cpi_family > 5 ||
5858 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5859 return (X86_VENDOR_AMD);
5860 break;
5861 case X86_VENDOR_TM:
5862 if (cpi->cpi_family >= 5)
5863 return (X86_VENDOR_AMD);
5864 /*FALLTHROUGH*/
5865 default:
5866 /*
5867 * If they have extended CPU data for 0x80000005
5868 * then we assume they have AMD-format cache
5869 * information.
5870 *
5871 * If not, and the vendor happens to be Cyrix,
5872 * then try our-Cyrix specific handler.
5873 *
5874 * If we're not Cyrix, then assume we're using Intel's
5875 * table-driven format instead.
5876 */
5877 if (cpi->cpi_xmaxeax >= 0x80000005)
5878 return (X86_VENDOR_AMD);
5879 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5880 return (X86_VENDOR_Cyrix);
5881 else if (cpi->cpi_maxeax >= 2)
5882 return (X86_VENDOR_Intel);
5883 break;
5884 }
5885 return (-1);
5886 }
5887
5888 void
5889 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5890 struct cpuid_info *cpi)
5891 {
5892 dev_info_t *cpu_devi;
5893 int create;
5894
5895 cpu_devi = (dev_info_t *)dip;
5896
5897 /* device_type */
5898 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5899 "device_type", "cpu");
5900
5901 /* reg */
5902 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5903 "reg", cpu_id);
5904
5905 /* cpu-mhz, and clock-frequency */
5906 if (cpu_freq > 0) {
5907 long long mul;
5908
5909 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5910 "cpu-mhz", cpu_freq);
5911 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5912 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5913 "clock-frequency", (int)mul);
5914 }
5915
5916 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5917 return;
5918 }
5919
5920 /* vendor-id */
5921 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5922 "vendor-id", cpi->cpi_vendorstr);
5923
5924 if (cpi->cpi_maxeax == 0) {
5925 return;
5926 }
5927
5928 /*
5929 * family, model, and step
5930 */
5931 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5932 "family", CPI_FAMILY(cpi));
5933 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5934 "cpu-model", CPI_MODEL(cpi));
5935 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5936 "stepping-id", CPI_STEP(cpi));
5937
5938 /* type */
5939 switch (cpi->cpi_vendor) {
5940 case X86_VENDOR_Intel:
5941 create = 1;
5942 break;
5943 default:
5944 create = 0;
5945 break;
5946 }
5947 if (create)
5948 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5949 "type", CPI_TYPE(cpi));
5950
5951 /* ext-family */
5952 switch (cpi->cpi_vendor) {
5953 case X86_VENDOR_Intel:
5954 case X86_VENDOR_AMD:
5955 create = cpi->cpi_family >= 0xf;
5956 break;
5957 default:
5958 create = 0;
5959 break;
5960 }
5961 if (create)
5962 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5963 "ext-family", CPI_FAMILY_XTD(cpi));
5964
5965 /* ext-model */
5966 switch (cpi->cpi_vendor) {
5967 case X86_VENDOR_Intel:
5968 create = IS_EXTENDED_MODEL_INTEL(cpi);
5969 break;
5970 case X86_VENDOR_AMD:
5971 create = CPI_FAMILY(cpi) == 0xf;
5972 break;
5973 default:
5974 create = 0;
5975 break;
5976 }
5977 if (create)
5978 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5979 "ext-model", CPI_MODEL_XTD(cpi));
5980
5981 /* generation */
5982 switch (cpi->cpi_vendor) {
5983 case X86_VENDOR_AMD:
5984 /*
5985 * AMD K5 model 1 was the first part to support this
5986 */
5987 create = cpi->cpi_xmaxeax >= 0x80000001;
5988 break;
5989 default:
5990 create = 0;
5991 break;
5992 }
5993 if (create)
5994 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5995 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5996
5997 /* brand-id */
5998 switch (cpi->cpi_vendor) {
5999 case X86_VENDOR_Intel:
6000 /*
6001 * brand id first appeared on Pentium III Xeon model 8,
6002 * and Celeron model 8 processors and Opteron
6003 */
6004 create = cpi->cpi_family > 6 ||
6005 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
6006 break;
6007 case X86_VENDOR_AMD:
6008 create = cpi->cpi_family >= 0xf;
6009 break;
6010 default:
6011 create = 0;
6012 break;
6013 }
6014 if (create && cpi->cpi_brandid != 0) {
6015 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6016 "brand-id", cpi->cpi_brandid);
6017 }
6018
6019 /* chunks, and apic-id */
6020 switch (cpi->cpi_vendor) {
6021 /*
6022 * first available on Pentium IV and Opteron (K8)
6023 */
6024 case X86_VENDOR_Intel:
6025 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6026 break;
6027 case X86_VENDOR_AMD:
6028 create = cpi->cpi_family >= 0xf;
6029 break;
6030 default:
6031 create = 0;
6032 break;
6033 }
6034 if (create) {
6035 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6036 "chunks", CPI_CHUNKS(cpi));
6037 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6038 "apic-id", cpi->cpi_apicid);
6039 if (cpi->cpi_chipid >= 0) {
6040 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6041 "chip#", cpi->cpi_chipid);
6042 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6043 "clog#", cpi->cpi_clogid);
6044 }
6045 }
6046
6047 /* cpuid-features */
6048 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6049 "cpuid-features", CPI_FEATURES_EDX(cpi));
6050
6051
6052 /* cpuid-features-ecx */
6053 switch (cpi->cpi_vendor) {
6054 case X86_VENDOR_Intel:
6055 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6056 break;
6057 case X86_VENDOR_AMD:
6058 create = cpi->cpi_family >= 0xf;
6059 break;
6060 default:
6061 create = 0;
6062 break;
6063 }
6064 if (create)
6065 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6066 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6067
6068 /* ext-cpuid-features */
6069 switch (cpi->cpi_vendor) {
6070 case X86_VENDOR_Intel:
6071 case X86_VENDOR_AMD:
6072 case X86_VENDOR_Cyrix:
6073 case X86_VENDOR_TM:
6074 case X86_VENDOR_Centaur:
6075 create = cpi->cpi_xmaxeax >= 0x80000001;
6076 break;
6077 default:
6078 create = 0;
6079 break;
6080 }
6081 if (create) {
6082 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6083 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6084 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6085 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6086 }
6087
6088 /*
6089 * Brand String first appeared in Intel Pentium IV, AMD K5
6090 * model 1, and Cyrix GXm. On earlier models we try and
6091 * simulate something similar .. so this string should always
6092 * same -something- about the processor, however lame.
6093 */
6094 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6095 "brand-string", cpi->cpi_brandstr);
6096
6097 /*
6098 * Finally, cache and tlb information
6099 */
6100 switch (x86_which_cacheinfo(cpi)) {
6101 case X86_VENDOR_Intel:
6102 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6103 break;
6104 case X86_VENDOR_Cyrix:
6105 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6106 break;
6107 case X86_VENDOR_AMD:
6108 amd_cache_info(cpi, cpu_devi);
6109 break;
6110 default:
6111 break;
6112 }
6113 }
6114
6115 struct l2info {
6116 int *l2i_csz;
6117 int *l2i_lsz;
6118 int *l2i_assoc;
6119 int l2i_ret;
6120 };
6121
6122 /*
6123 * A cacheinfo walker that fetches the size, line-size and associativity
6124 * of the L2 cache
6125 */
6126 static int
6127 intel_l2cinfo(void *arg, const struct cachetab *ct)
6128 {
6129 struct l2info *l2i = arg;
6130 int *ip;
6131
6132 if (ct->ct_label != l2_cache_str &&
6133 ct->ct_label != sl2_cache_str)
6134 return (0); /* not an L2 -- keep walking */
6135
6136 if ((ip = l2i->l2i_csz) != NULL)
6137 *ip = ct->ct_size;
6138 if ((ip = l2i->l2i_lsz) != NULL)
6139 *ip = ct->ct_line_size;
6140 if ((ip = l2i->l2i_assoc) != NULL)
6141 *ip = ct->ct_assoc;
6142 l2i->l2i_ret = ct->ct_size;
6143 return (1); /* was an L2 -- terminate walk */
6144 }
6145
6146 /*
6147 * AMD L2/L3 Cache and TLB Associativity Field Definition:
6148 *
6149 * Unlike the associativity for the L1 cache and tlb where the 8 bit
6150 * value is the associativity, the associativity for the L2 cache and
6151 * tlb is encoded in the following table. The 4 bit L2 value serves as
6152 * an index into the amd_afd[] array to determine the associativity.
6153 * -1 is undefined. 0 is fully associative.
6154 */
6155
6156 static int amd_afd[] =
6157 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6158
6159 static void
6160 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6161 {
6162 struct cpuid_regs *cp;
6163 uint_t size, assoc;
6164 int i;
6165 int *ip;
6166
6167 if (cpi->cpi_xmaxeax < 0x80000006)
6168 return;
6169 cp = &cpi->cpi_extd[6];
6170
6171 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6172 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6173 uint_t cachesz = size * 1024;
6174 assoc = amd_afd[i];
6175
6176 ASSERT(assoc != -1);
6177
6178 if ((ip = l2i->l2i_csz) != NULL)
6179 *ip = cachesz;
6180 if ((ip = l2i->l2i_lsz) != NULL)
6181 *ip = BITX(cp->cp_ecx, 7, 0);
6182 if ((ip = l2i->l2i_assoc) != NULL)
6183 *ip = assoc;
6184 l2i->l2i_ret = cachesz;
6185 }
6186 }
6187
6188 int
6189 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6190 {
6191 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6192 struct l2info __l2info, *l2i = &__l2info;
6193
6194 l2i->l2i_csz = csz;
6195 l2i->l2i_lsz = lsz;
6196 l2i->l2i_assoc = assoc;
6197 l2i->l2i_ret = -1;
6198
6199 switch (x86_which_cacheinfo(cpi)) {
6200 case X86_VENDOR_Intel:
6201 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6202 break;
6203 case X86_VENDOR_Cyrix:
6204 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6205 break;
6206 case X86_VENDOR_AMD:
6207 amd_l2cacheinfo(cpi, l2i);
6208 break;
6209 default:
6210 break;
6211 }
6212 return (l2i->l2i_ret);
6213 }
6214
6215 #if !defined(__xpv)
6216
6217 uint32_t *
6218 cpuid_mwait_alloc(cpu_t *cpu)
6219 {
6220 uint32_t *ret;
6221 size_t mwait_size;
6222
6223 ASSERT(cpuid_checkpass(CPU, 2));
6224
6225 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6226 if (mwait_size == 0)
6227 return (NULL);
6228
6229 /*
6230 * kmem_alloc() returns cache line size aligned data for mwait_size
6231 * allocations. mwait_size is currently cache line sized. Neither
6232 * of these implementation details are guarantied to be true in the
6233 * future.
6234 *
6235 * First try allocating mwait_size as kmem_alloc() currently returns
6236 * correctly aligned memory. If kmem_alloc() does not return
6237 * mwait_size aligned memory, then use mwait_size ROUNDUP.
6238 *
6239 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6240 * decide to free this memory.
6241 */
6242 ret = kmem_zalloc(mwait_size, KM_SLEEP);
6243 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6244 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6245 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6246 *ret = MWAIT_RUNNING;
6247 return (ret);
6248 } else {
6249 kmem_free(ret, mwait_size);
6250 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6251 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6252 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6253 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6254 *ret = MWAIT_RUNNING;
6255 return (ret);
6256 }
6257 }
6258
6259 void
6260 cpuid_mwait_free(cpu_t *cpu)
6261 {
6262 if (cpu->cpu_m.mcpu_cpi == NULL) {
6263 return;
6264 }
6265
6266 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6267 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6268 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6269 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6270 }
6271
6272 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6273 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6274 }
6275
6276 void
6277 patch_tsc_read(int flag)
6278 {
6279 size_t cnt;
6280
6281 switch (flag) {
6282 case TSC_NONE:
6283 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6284 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6285 break;
6286 case TSC_RDTSC_MFENCE:
6287 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6288 (void) memcpy((void *)tsc_read,
6289 (void *)&_tsc_mfence_start, cnt);
6290 break;
6291 case TSC_RDTSC_LFENCE:
6292 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6293 (void) memcpy((void *)tsc_read,
6294 (void *)&_tsc_lfence_start, cnt);
6295 break;
6296 case TSC_TSCP:
6297 cnt = &_tscp_end - &_tscp_start;
6298 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6299 break;
6300 default:
6301 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6302 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6303 break;
6304 }
6305 tsc_type = flag;
6306 }
6307
6308 int
6309 cpuid_deep_cstates_supported(void)
6310 {
6311 struct cpuid_info *cpi;
6312 struct cpuid_regs regs;
6313
6314 ASSERT(cpuid_checkpass(CPU, 1));
6315
6316 cpi = CPU->cpu_m.mcpu_cpi;
6317
6318 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6319 return (0);
6320
6321 switch (cpi->cpi_vendor) {
6322 case X86_VENDOR_Intel:
6323 if (cpi->cpi_xmaxeax < 0x80000007)
6324 return (0);
6325
6326 /*
6327 * TSC run at a constant rate in all ACPI C-states?
6328 */
6329 regs.cp_eax = 0x80000007;
6330 (void) __cpuid_insn(®s);
6331 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6332
6333 default:
6334 return (0);
6335 }
6336 }
6337
6338 #endif /* !__xpv */
6339
6340 void
6341 post_startup_cpu_fixups(void)
6342 {
6343 #ifndef __xpv
6344 /*
6345 * Some AMD processors support C1E state. Entering this state will
6346 * cause the local APIC timer to stop, which we can't deal with at
6347 * this time.
6348 */
6349 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6350 on_trap_data_t otd;
6351 uint64_t reg;
6352
6353 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6354 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6355 /* Disable C1E state if it is enabled by BIOS */
6356 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6357 AMD_ACTONCMPHALT_MASK) {
6358 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6359 AMD_ACTONCMPHALT_SHIFT);
6360 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6361 }
6362 }
6363 no_trap();
6364 }
6365 #endif /* !__xpv */
6366 }
6367
6368 void
6369 enable_pcid(void)
6370 {
6371 if (x86_use_pcid == -1)
6372 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6373
6374 if (x86_use_invpcid == -1) {
6375 x86_use_invpcid = is_x86_feature(x86_featureset,
6376 X86FSET_INVPCID);
6377 }
6378
6379 if (!x86_use_pcid)
6380 return;
6381
6382 /*
6383 * Intel say that on setting PCIDE, it immediately starts using the PCID
6384 * bits; better make sure there's nothing there.
6385 */
6386 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6387
6388 setcr4(getcr4() | CR4_PCIDE);
6389 }
6390
6391 /*
6392 * Setup necessary registers to enable XSAVE feature on this processor.
6393 * This function needs to be called early enough, so that no xsave/xrstor
6394 * ops will execute on the processor before the MSRs are properly set up.
6395 *
6396 * Current implementation has the following assumption:
6397 * - cpuid_pass1() is done, so that X86 features are known.
6398 * - fpu_probe() is done, so that fp_save_mech is chosen.
6399 */
6400 void
6401 xsave_setup_msr(cpu_t *cpu)
6402 {
6403 ASSERT(fp_save_mech == FP_XSAVE);
6404 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6405
6406 /* Enable OSXSAVE in CR4. */
6407 setcr4(getcr4() | CR4_OSXSAVE);
6408 /*
6409 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6410 * correct value.
6411 */
6412 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6413 setup_xfem();
6414 }
6415
6416 /*
6417 * Starting with the Westmere processor the local
6418 * APIC timer will continue running in all C-states,
6419 * including the deepest C-states.
6420 */
6421 int
6422 cpuid_arat_supported(void)
6423 {
6424 struct cpuid_info *cpi;
6425 struct cpuid_regs regs;
6426
6427 ASSERT(cpuid_checkpass(CPU, 1));
6428 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6429
6430 cpi = CPU->cpu_m.mcpu_cpi;
6431
6432 switch (cpi->cpi_vendor) {
6433 case X86_VENDOR_Intel:
6434 /*
6435 * Always-running Local APIC Timer is
6436 * indicated by CPUID.6.EAX[2].
6437 */
6438 if (cpi->cpi_maxeax >= 6) {
6439 regs.cp_eax = 6;
6440 (void) cpuid_insn(NULL, ®s);
6441 return (regs.cp_eax & CPUID_CSTATE_ARAT);
6442 } else {
6443 return (0);
6444 }
6445 default:
6446 return (0);
6447 }
6448 }
6449
6450 /*
6451 * Check support for Intel ENERGY_PERF_BIAS feature
6452 */
6453 int
6454 cpuid_iepb_supported(struct cpu *cp)
6455 {
6456 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6457 struct cpuid_regs regs;
6458
6459 ASSERT(cpuid_checkpass(cp, 1));
6460
6461 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6462 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6463 return (0);
6464 }
6465
6466 /*
6467 * Intel ENERGY_PERF_BIAS MSR is indicated by
6468 * capability bit CPUID.6.ECX.3
6469 */
6470 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6471 return (0);
6472
6473 regs.cp_eax = 0x6;
6474 (void) cpuid_insn(NULL, ®s);
6475 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6476 }
6477
6478 /*
6479 * Check support for TSC deadline timer
6480 *
6481 * TSC deadline timer provides a superior software programming
6482 * model over local APIC timer that eliminates "time drifts".
6483 * Instead of specifying a relative time, software specifies an
6484 * absolute time as the target at which the processor should
6485 * generate a timer event.
6486 */
6487 int
6488 cpuid_deadline_tsc_supported(void)
6489 {
6490 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6491 struct cpuid_regs regs;
6492
6493 ASSERT(cpuid_checkpass(CPU, 1));
6494 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6495
6496 switch (cpi->cpi_vendor) {
6497 case X86_VENDOR_Intel:
6498 if (cpi->cpi_maxeax >= 1) {
6499 regs.cp_eax = 1;
6500 (void) cpuid_insn(NULL, ®s);
6501 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6502 } else {
6503 return (0);
6504 }
6505 default:
6506 return (0);
6507 }
6508 }
6509
6510 #if defined(__amd64) && !defined(__xpv)
6511 /*
6512 * Patch in versions of bcopy for high performance Intel Nhm processors
6513 * and later...
6514 */
6515 void
6516 patch_memops(uint_t vendor)
6517 {
6518 size_t cnt, i;
6519 caddr_t to, from;
6520
6521 if ((vendor == X86_VENDOR_Intel) &&
6522 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6523 cnt = &bcopy_patch_end - &bcopy_patch_start;
6524 to = &bcopy_ck_size;
6525 from = &bcopy_patch_start;
6526 for (i = 0; i < cnt; i++) {
6527 *to++ = *from++;
6528 }
6529 }
6530 }
6531 #endif /* __amd64 && !__xpv */
6532
6533 /*
6534 * We're being asked to tell the system how many bits are required to represent
6535 * the various thread and strand IDs. While it's tempting to derive this based
6536 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6537 * correct. Instead, this needs to be based on the number of bits that the APIC
6538 * allows for these different configurations. We only update these to a larger
6539 * value if we find one.
6540 */
6541 void
6542 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6543 {
6544 struct cpuid_info *cpi;
6545
6546 VERIFY(cpuid_checkpass(CPU, 1));
6547 cpi = cpu->cpu_m.mcpu_cpi;
6548
6549 if (cpi->cpi_ncore_bits > *core_nbits) {
6550 *core_nbits = cpi->cpi_ncore_bits;
6551 }
6552
6553 if (cpi->cpi_nthread_bits > *strand_nbits) {
6554 *strand_nbits = cpi->cpi_nthread_bits;
6555 }
6556 }
6557
6558 void
6559 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6560 {
6561 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6562 struct cpuid_regs cp;
6563
6564 /*
6565 * Reread the CPUID portions that we need for various security
6566 * information.
6567 */
6568 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6569 /*
6570 * Check if we now have leaf 7 available to us.
6571 */
6572 if (cpi->cpi_maxeax < 7) {
6573 bzero(&cp, sizeof (cp));
6574 cp.cp_eax = 0;
6575 cpi->cpi_maxeax = __cpuid_insn(&cp);
6576 if (cpi->cpi_maxeax < 7)
6577 return;
6578 }
6579
6580 bzero(&cp, sizeof (cp));
6581 cp.cp_eax = 7;
6582 cp.cp_ecx = 0;
6583 (void) __cpuid_insn(&cp);
6584 cpi->cpi_std[7] = cp;
6585 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6586 /* No xcpuid support */
6587 if (cpi->cpi_family < 5 ||
6588 (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6589 return;
6590
6591 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6592 bzero(&cp, sizeof (cp));
6593 cp.cp_eax = CPUID_LEAF_EXT_0;
6594 cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6595 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6596 return;
6597 }
6598 }
6599
6600 bzero(&cp, sizeof (cp));
6601 cp.cp_eax = CPUID_LEAF_EXT_8;
6602 (void) __cpuid_insn(&cp);
6603 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6604 cpi->cpi_extd[8] = cp;
6605 } else {
6606 /*
6607 * Nothing to do here. Return an empty set which has already
6608 * been zeroed for us.
6609 */
6610 return;
6611 }
6612 cpuid_scan_security(cpu, fset);
6613 }
6614
6615 /* ARGSUSED */
6616 static int
6617 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6618 {
6619 uchar_t *fset;
6620
6621 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6622 cpuid_pass_ucode(CPU, fset);
6623
6624 return (0);
6625 }
6626
6627 /*
6628 * After a microcode update where the version has changed, then we need to
6629 * rescan CPUID. To do this we check every CPU to make sure that they have the
6630 * same microcode. Then we perform a cross call to all such CPUs. It's the
6631 * caller's job to make sure that no one else can end up doing an update while
6632 * this is going on.
6633 *
6634 * We assume that the system is microcode capable if we're called.
6635 */
6636 void
6637 cpuid_post_ucodeadm(void)
6638 {
6639 uint32_t rev;
6640 int i;
6641 struct cpu *cpu;
6642 cpuset_t cpuset;
6643 void *argdata;
6644 uchar_t *f0;
6645
6646 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6647
6648 mutex_enter(&cpu_lock);
6649 cpu = cpu_get(0);
6650 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6651 CPUSET_ONLY(cpuset, 0);
6652 for (i = 1; i < max_ncpus; i++) {
6653 if ((cpu = cpu_get(i)) == NULL)
6654 continue;
6655
6656 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6657 panic("post microcode update CPU %d has differing "
6658 "microcode revision (%u) from CPU 0 (%u)",
6659 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6660 }
6661 CPUSET_ADD(cpuset, i);
6662 }
6663
6664 kpreempt_disable();
6665 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6666 cpuid_post_ucodeadm_xc);
6667 kpreempt_enable();
6668
6669 /*
6670 * OK, now look at each CPU and see if their feature sets are equal.
6671 */
6672 f0 = argdata;
6673 for (i = 1; i < max_ncpus; i++) {
6674 uchar_t *fset;
6675 if (!CPU_IN_SET(cpuset, i))
6676 continue;
6677
6678 fset = (uchar_t *)((uintptr_t)argdata +
6679 sizeof (x86_featureset) * i);
6680
6681 if (!compare_x86_featureset(f0, fset)) {
6682 panic("Post microcode update CPU %d has "
6683 "differing security feature (%p) set from CPU 0 "
6684 "(%p), not appending to feature set", i,
6685 (void *)fset, (void *)f0);
6686 }
6687 }
6688
6689 mutex_exit(&cpu_lock);
6690
6691 for (i = 0; i < NUM_X86_FEATURES; i++) {
6692 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6693 x86_feature_names[i]);
6694 if (is_x86_feature(f0, i)) {
6695 add_x86_feature(x86_featureset, i);
6696 }
6697 }
6698 kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6699 }