1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 */
27 /*
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
30 */
31 /*
32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 */
34 /*
35 * Copyright 2019, Joyent, Inc.
36 */
37
38 /*
39 * CPU Identification logic
40 *
41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
42 * with the identification of CPUs, their features, and their topologies. More
43 * specifically, this file helps drive the following:
44 *
45 * 1. Enumeration of features of the processor which are used by the kernel to
46 * determine what features to enable or disable. These may be instruction set
47 * enhancements or features that we use.
48 *
49 * 2. Enumeration of instruction set architecture (ISA) additions that userland
50 * will be told about through the auxiliary vector.
51 *
52 * 3. Understanding the physical topology of the CPU such as the number of
53 * caches, how many cores it has, whether or not it supports symmetric
54 * multi-processing (SMT), etc.
55 *
56 * ------------------------
57 * CPUID History and Basics
58 * ------------------------
59 *
60 * The cpuid instruction was added by Intel roughly around the time that the
61 * original Pentium was introduced. The purpose of cpuid was to tell in a
62 * programmatic fashion information about the CPU that previously was guessed
63 * at. For example, an important part of cpuid is that we can know what
64 * extensions to the ISA exist. If you use an invalid opcode you would get a
65 * #UD, so this method allows a program (whether a user program or the kernel)
66 * to determine what exists without crashing or getting a SIGILL. Of course,
67 * this was also during the era of the clones and the AMD Am5x86. The vendor
68 * name shows up first in cpuid for a reason.
69 *
70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
72 * its own meaning. The different leaves are broken down into different regions:
73 *
74 * [ 0, 7fffffff ] This region is called the 'basic'
75 * region. This region is generally defined
76 * by Intel, though some of the original
77 * portions have different meanings based
78 * on the manufacturer. These days, Intel
79 * adds most new features to this region.
80 * AMD adds non-Intel compatible
81 * information in the third, extended
82 * region. Intel uses this for everything
83 * including ISA extensions, CPU
84 * features, cache information, topology,
85 * and more.
86 *
87 * There is a hole carved out of this
88 * region which is reserved for
89 * hypervisors.
90 *
91 * [ 40000000, 4fffffff ] This region, which is found in the
92 * middle of the previous region, is
93 * explicitly promised to never be used by
94 * CPUs. Instead, it is used by hypervisors
95 * to communicate information about
96 * themselves to the operating system. The
97 * values and details are unique for each
98 * hypervisor.
99 *
100 * [ 80000000, ffffffff ] This region is called the 'extended'
101 * region. Some of the low leaves mirror
102 * parts of the basic leaves. This region
103 * has generally been used by AMD for
104 * various extensions. For example, AMD-
105 * specific information about caches,
106 * features, and topology are found in this
107 * region.
108 *
109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of
111 * the ranges, one of the primary things returned is the maximum valid leaf in
112 * that range. This allows for discovery of what range of CPUID is valid.
113 *
114 * The CPUs have potentially surprising behavior when using an invalid leaf or
115 * unimplemented leaf. If the requested leaf is within the valid basic or
116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
117 * set to zero. However, if you specify a leaf that is outside of a valid range,
118 * then instead it will be filled with the last valid _basic_ leaf. For example,
119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
120 * an invalid extended leaf will return the information for leaf 3.
121 *
122 * Some leaves are broken down into sub-leaves. This means that the value
123 * depends on both the leaf asked for in %eax and a secondary register. For
124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
125 * additional information. Or when getting topology information in leaf 0xb, the
126 * initial value in %ecx changes which level of the topology that you are
127 * getting information about.
128 *
129 * cpuid values are always kept to 32 bits regardless of whether or not the
130 * program is in 64-bit mode. When executing in 64-bit mode, the upper
131 * 32 bits of the register are always set to zero so that way the values are the
132 * same regardless of execution mode.
133 *
134 * ----------------------
135 * Identifying Processors
136 * ----------------------
137 *
138 * We can identify a processor in two steps. The first step looks at cpuid leaf
139 * 0. Leaf 0 contains the processor's vendor information. This is done by
140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
142 *
143 * From there, a processor is identified by a combination of three different
144 * values:
145 *
146 * 1. Family
147 * 2. Model
148 * 3. Stepping
149 *
150 * Each vendor uses the family and model to uniquely identify a processor. The
151 * way that family and model are changed depends on the vendor. For example,
152 * Intel has been using family 0x6 for almost all of their processor since the
153 * Pentium Pro/Pentium II era, often called the P6. The model is used to
154 * identify the exact processor. Different models are often used for the client
155 * (consumer) and server parts. Even though each processor often has major
156 * architectural differences, they still are considered the same family by
157 * Intel.
158 *
159 * On the other hand, each major AMD architecture generally has its own family.
160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
161 * the model number is used to help identify specific processors.
162 *
163 * The stepping is used to refer to a revision of a specific microprocessor. The
164 * term comes from equipment used to produce masks that are used to create
165 * integrated circuits.
166 *
167 * The information is present in leaf 1, %eax. In technical documentation you
168 * will see the terms extended model and extended family. The original family,
169 * model, and stepping fields were each 4 bits wide. If the values in either
170 * are 0xf, then one is to consult the extended model and extended family, which
171 * take previously reserved bits and allow for a larger number of models and add
172 * 0xf to them.
173 *
174 * When we process this information, we store the full family, model, and
175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and
176 * cpi_step, respectively. Whenever you are performing comparisons with the
177 * family, model, and stepping, you should use these members and not the raw
178 * values from cpuid. If you must use the raw values from cpuid directly, you
179 * must make sure that you add the extended model and family to the base model
180 * and family.
181 *
182 * In general, we do not use information about the family, model, and stepping
183 * to determine whether or not a feature is present; that is generally driven by
184 * specific leaves. However, when something we care about on the processor is
185 * not considered 'architectural' meaning that it is specific to a set of
186 * processors and not promised in the architecture model to be consistent from
187 * generation to generation, then we will fall back on this information. The
188 * most common cases where this comes up is when we have to workaround errata in
189 * the processor, are dealing with processor-specific features such as CPU
190 * performance counters, or we want to provide additional information for things
191 * such as fault management.
192 *
193 * While processors also do have a brand string, which is the name that people
194 * are familiar with when buying the processor, they are not meant for
195 * programmatic consumption. That is what the family, model, and stepping are
196 * for.
197 *
198 * ------------
199 * CPUID Passes
200 * ------------
201 *
202 * As part of performing feature detection, we break this into several different
203 * passes. The passes are as follows:
204 *
205 * Pass 0 This is a primordial pass done in locore.s to deal with
206 * Cyrix CPUs that don't support cpuid. The reality is that
207 * we likely don't run on them any more, but there is still
208 * logic for handling them.
209 *
210 * Pass 1 This is the primary pass and is responsible for doing a
211 * large number of different things:
212 *
213 * 1. Determine which vendor manufactured the CPU and
214 * determining the family, model, and stepping information.
215 *
216 * 2. Gathering a large number of feature flags to
217 * determine which features the CPU support and which
218 * indicate things that we need to do other work in the OS
219 * to enable. Features detected this way are added to the
220 * x86_featureset which can be queried to
221 * determine what we should do. This includes processing
222 * all of the basic and extended CPU features that we care
223 * about.
224 *
225 * 3. Determining the CPU's topology. This includes
226 * information about how many cores and threads are present
227 * in the package. It also is responsible for figuring out
228 * which logical CPUs are potentially part of the same core
229 * and what other resources they might share. For more
230 * information see the 'Topology' section.
231 *
232 * 4. Determining the set of CPU security-specific features
233 * that we need to worry about and determine the
234 * appropriate set of workarounds.
235 *
236 * Pass 1 on the boot CPU occurs before KMDB is started.
237 *
238 * Pass 2 The second pass is done after startup(). Here, we check
239 * other miscellaneous features. Most of this is gathering
240 * additional basic and extended features that we'll use in
241 * later passes or for debugging support.
242 *
243 * Pass 3 The third pass occurs after the kernel memory allocator
244 * has been fully initialized. This gathers information
245 * where we might need dynamic memory available for our
246 * uses. This includes several varying width leaves that
247 * have cache information and the processor's brand string.
248 *
249 * Pass 4 The fourth and final normal pass is performed after the
250 * kernel has brought most everything online. This is
251 * invoked from post_startup(). In this pass, we go through
252 * the set of features that we have enabled and turn that
253 * into the hardware auxiliary vector features that
254 * userland receives. This is used by userland, primarily
255 * by the run-time link-editor (RTLD), though userland
256 * software could also refer to it directly.
257 *
258 * Microcode After a microcode update, we do a selective rescan of
259 * the cpuid leaves to determine what features have
260 * changed. Microcode updates can provide more details
261 * about security related features to deal with issues like
262 * Spectre and L1TF. On occasion, vendors have violated
263 * their contract and removed bits. However, we don't try
264 * to detect that because that puts us in a situation that
265 * we really can't deal with. As such, the only thing we
266 * rescan are security related features today. See
267 * cpuid_pass_ucode().
268 *
269 * All of the passes (except pass 0) are run on all CPUs. However, for the most
270 * part we only care about what the boot CPU says about this information and use
271 * the other CPUs as a rough guide to sanity check that we have the same feature
272 * set.
273 *
274 * We do not support running multiple logical CPUs with disjoint, let alone
275 * different, feature sets.
276 *
277 * ------------------
278 * Processor Topology
279 * ------------------
280 *
281 * One of the important things that we need to do is to understand the topology
282 * of the underlying processor. When we say topology in this case, we're trying
283 * to understand the relationship between the logical CPUs that the operating
284 * system sees and the underlying physical layout. Different logical CPUs may
285 * share different resources which can have important consequences for the
286 * performance of the system. For example, they may share caches, execution
287 * units, and more.
288 *
289 * The topology of the processor changes from generation to generation and
290 * vendor to vendor. Along with that, different vendors use different
291 * terminology, and the operating system itself uses occasionally overlapping
292 * terminology. It's important to understand what this topology looks like so
293 * one can understand the different things that we try to calculate and
294 * determine.
295 *
296 * To get started, let's talk about a little bit of terminology that we've used
297 * so far, is used throughout this file, and is fairly generic across multiple
298 * vendors:
299 *
300 * CPU
301 * A central processing unit (CPU) refers to a logical and/or virtual
302 * entity that the operating system can execute instructions on. The
303 * underlying resources for this CPU may be shared between multiple
304 * entities; however, to the operating system it is a discrete unit.
305 *
306 * PROCESSOR and PACKAGE
307 *
308 * Generally, when we use the term 'processor' on its own, we are referring
309 * to the physical entity that one buys and plugs into a board. However,
310 * because processor has been overloaded and one might see it used to mean
311 * multiple different levels, we will instead use the term 'package' for
312 * the rest of this file. The term package comes from the electrical
313 * engineering side and refers to the physical entity that encloses the
314 * electronics inside. Strictly speaking the package can contain more than
315 * just the CPU, for example, on many processors it may also have what's
316 * called an 'integrated graphical processing unit (GPU)'. Because the
317 * package can encapsulate multiple units, it is the largest physical unit
318 * that we refer to.
319 *
320 * SOCKET
321 *
322 * A socket refers to unit on a system board (generally the motherboard)
323 * that can receive a package. A single package, or processor, is plugged
324 * into a single socket. A system may have multiple sockets. Often times,
325 * the term socket is used interchangeably with package and refers to the
326 * electrical component that has plugged in, and not the receptacle itself.
327 *
328 * CORE
329 *
330 * A core refers to the physical instantiation of a CPU, generally, with a
331 * full set of hardware resources available to it. A package may contain
332 * multiple cores inside of it or it may just have a single one. A
333 * processor with more than one core is often referred to as 'multi-core'.
334 * In illumos, we will use the feature X86FSET_CMP to refer to a system
335 * that has 'multi-core' processors.
336 *
337 * A core may expose a single logical CPU to the operating system, or it
338 * may expose multiple CPUs, which we call threads, defined below.
339 *
340 * Some resources may still be shared by cores in the same package. For
341 * example, many processors will share the level 3 cache between cores.
342 * Some AMD generations share hardware resources between cores. For more
343 * information on that see the section 'AMD Topology'.
344 *
345 * THREAD and STRAND
346 *
347 * In this file, generally a thread refers to a hardware resources and not
348 * the operating system's logical abstraction. A thread is always exposed
349 * as an independent logical CPU to the operating system. A thread belongs
350 * to a specific core. A core may have more than one thread. When that is
351 * the case, the threads that are part of the same core are often referred
352 * to as 'siblings'.
353 *
354 * When multiple threads exist, this is generally referred to as
355 * simultaneous multi-threading (SMT). When Intel introduced this in their
356 * processors they called it hyper-threading (HT). When multiple threads
357 * are active in a core, they split the resources of the core. For example,
358 * two threads may share the same set of hardware execution units.
359 *
360 * The operating system often uses the term 'strand' to refer to a thread.
361 * This helps disambiguate it from the software concept.
362 *
363 * CHIP
364 *
365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most
366 * base meaning, it is used to refer to a single integrated circuit, which
367 * may or may not be the only thing in the package. In illumos, when you
368 * see the term 'chip' it is almost always referring to the same thing as
369 * the 'package'. However, many vendors may use chip to refer to one of
370 * many integrated circuits that have been placed in the package. As an
371 * example, see the subsequent definition.
372 *
373 * To try and keep things consistent, we will only use chip when referring
374 * to the entire integrated circuit package, with the exception of the
375 * definition of multi-chip module (because it is in the name) and use the
376 * term 'die' when we want the more general, potential sub-component
377 * definition.
378 *
379 * DIE
380 *
381 * A die refers to an integrated circuit. Inside of the package there may
382 * be a single die or multiple dies. This is sometimes called a 'chip' in
383 * vendor's parlance, but in this file, we use the term die to refer to a
384 * subcomponent.
385 *
386 * MULTI-CHIP MODULE
387 *
388 * A multi-chip module (MCM) refers to putting multiple distinct chips that
389 * are connected together in the same package. When a multi-chip design is
390 * used, generally each chip is manufactured independently and then joined
391 * together in the package. For example, on AMD's Zen microarchitecture
392 * (family 0x17), the package contains several dies (the second meaning of
393 * chip from above) that are connected together.
394 *
395 * CACHE
396 *
397 * A cache is a part of the processor that maintains copies of recently
398 * accessed memory. Caches are split into levels and then into types.
399 * Commonly there are one to three levels, called level one, two, and
400 * three. The lower the level, the smaller it is, the closer it is to the
401 * execution units of the CPU, and the faster it is to access. The layout
402 * and design of the cache come in many different flavors, consult other
403 * resources for a discussion of those.
404 *
405 * Caches are generally split into two types, the instruction and data
406 * cache. The caches contain what their names suggest, the instruction
407 * cache has executable program text, while the data cache has all other
408 * memory that the processor accesses. As of this writing, data is kept
409 * coherent between all of the caches on x86, so if one modifies program
410 * text before it is executed, that will be in the data cache, and the
411 * instruction cache will be synchronized with that change when the
412 * processor actually executes those instructions. This coherency also
413 * covers the fact that data could show up in multiple caches.
414 *
415 * Generally, the lowest level caches are specific to a core. However, the
416 * last layer cache is shared between some number of cores. The number of
417 * CPUs sharing this last level cache is important. This has implications
418 * for the choices that the scheduler makes, as accessing memory that might
419 * be in a remote cache after thread migration can be quite expensive.
420 *
421 * Sometimes, the word cache is abbreviated with a '$', because in US
422 * English the word cache is pronounced the same as cash. So L1D$ refers to
423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used
424 * in the rest of this theory statement for clarity.
425 *
426 * MEMORY CONTROLLER
427 *
428 * The memory controller is a component that provides access to DRAM. Each
429 * memory controller can access a set number of DRAM channels. Each channel
430 * can have a number of DIMMs (sticks of memory) associated with it. A
431 * given package may have more than one memory controller. The association
432 * of the memory controller to a group of cores is important as it is
433 * cheaper to access memory on the controller that you are associated with.
434 *
435 * NUMA
436 *
437 * NUMA or non-uniform memory access, describes a way that systems are
438 * built. On x86, any processor core can address all of the memory in the
439 * system. However, When using multiple sockets or possibly within a
440 * multi-chip module, some of that memory is physically closer and some of
441 * it is further. Memory that is further away is more expensive to access.
442 * Consider the following image of multiple sockets with memory:
443 *
444 * +--------+ +--------+
445 * | DIMM A | +----------+ +----------+ | DIMM D |
446 * +--------+-+ | | | | +-+------+-+
447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
448 * +--------+-+ | | | | +-+------+-+
449 * | DIMM C | +----------+ +----------+ | DIMM F |
450 * +--------+ +--------+
451 *
452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to
454 * access DIMMs A-C and more expensive to access D-F as it has to go
455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
456 * D-F are cheaper than A-C. While the socket form is the most common, when
457 * using multi-chip modules, this can also sometimes occur. For another
458 * example of this that's more involved, see the AMD topology section.
459 *
460 *
461 * Intel Topology
462 * --------------
463 *
464 * Most Intel processors since Nehalem, (as of this writing the current gen
465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
466 * the package is a single monolithic die. MCMs currently aren't used. Most
467 * parts have three levels of caches, with the L3 cache being shared between
468 * all of the cores on the package. The L1/L2 cache is generally specific to
469 * an individual core. The following image shows at a simplified level what
470 * this looks like. The memory controller is commonly part of something called
471 * the 'Uncore', that used to be separate physical chips that were not a part of
472 * the package, but are now part of the same chip.
473 *
474 * +-----------------------------------------------------------------------+
475 * | Package |
476 * | +-------------------+ +-------------------+ +-------------------+ |
477 * | | Core | | Core | | Core | |
478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | |
480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | |
481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | |
482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | |
483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
484 * | | +--------------+ | | +--------------+ | | +--------------+ | |
485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | |
486 * | | +--------------+ | | +--------------+ | | +--------------+ | |
487 * | +-------------------+ +-------------------+ +-------------------+ |
488 * | +-------------------------------------------------------------------+ |
489 * | | Shared L3 Cache | |
490 * | +-------------------------------------------------------------------+ |
491 * | +-------------------------------------------------------------------+ |
492 * | | Memory Controller | |
493 * | +-------------------------------------------------------------------+ |
494 * +-----------------------------------------------------------------------+
495 *
496 * A side effect of this current architecture is that what we care about from a
497 * scheduling and topology perspective, is simplified. In general we care about
498 * understanding which logical CPUs are part of the same core and socket.
499 *
500 * To determine the relationship between threads and cores, Intel initially used
501 * the identifier in the advanced programmable interrupt controller (APIC). They
502 * also added cpuid leaf 4 to give additional information about the number of
503 * threads and CPUs in the processor. With the addition of x2apic (which
504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
505 * additional cpuid topology leaf 0xB was added.
506 *
507 * AMD Topology
508 * ------------
509 *
510 * When discussing AMD topology, we want to break this into three distinct
511 * generations of topology. There's the basic topology that has been used in
512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
513 * with family 0x15 (Bulldozer), and there's the topology that was introduced
514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth
515 * talking about.
516 *
517 * Until the introduction of family 0x17 (Zen), AMD did not implement something
518 * that they considered SMT. Whether or not the AMD processors have SMT
519 * influences many things including scheduling and reliability, availability,
520 * and serviceability (RAS) features.
521 *
522 * NODE
523 *
524 * AMD uses the term node to refer to a die that contains a number of cores
525 * and I/O resources. Depending on the processor family and model, more
526 * than one node can be present in the package. When there is more than one
527 * node this indicates a multi-chip module. Usually each node has its own
528 * access to memory and I/O devices. This is important and generally
529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a
530 * result, we track this relationship in the operating system.
531 *
532 * In processors with an L3 cache, the L3 cache is generally shared across
533 * the entire node, though the way this is carved up varies from generation
534 * to generation.
535 *
536 * BULLDOZER
537 *
538 * Starting with the Bulldozer family (0x15) and continuing until the
539 * introduction of the Zen microarchitecture, AMD introduced the idea of a
540 * compute unit. In a compute unit, two traditional cores share a number of
541 * hardware resources. Critically, they share the FPU, L1 instruction
542 * cache, and the L2 cache. Several compute units were then combined inside
543 * of a single node. Because the integer execution units, L1 data cache,
544 * and some other resources were not shared between the cores, AMD never
545 * considered this to be SMT.
546 *
547 * ZEN
548 *
549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module
550 * is called Zeppelin. These modules are similar to the idea of nodes used
551 * previously. Each of these nodes has two DRAM channels which all of the
552 * cores in the node can access uniformly. These nodes are linked together
553 * in the package, creating a NUMA environment.
554 *
555 * The Zeppelin die itself contains two different 'core complexes'. Each
556 * core complex consists of four cores which each have two threads, for a
557 * total of 8 logical CPUs per complex. Unlike other generations,
558 * where all the logical CPUs in a given node share the L3 cache, here each
559 * core complex has its own shared L3 cache.
560 *
561 * A further thing that we need to consider is that in some configurations,
562 * particularly with the Threadripper line of processors, not every die
563 * actually has its memory controllers wired up to actual memory channels.
564 * This means that some cores have memory attached to them and others
565 * don't.
566 *
567 * To put Zen in perspective, consider the following images:
568 *
569 * +--------------------------------------------------------+
570 * | Core Complex |
571 * | +-------------------+ +-------------------+ +---+ |
572 * | | Core +----+ | | Core +----+ | | | |
573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | |
574 * | | | Thread | +----+ | | | Thread | +----+ | | | |
575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | |
576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | |
577 * | | +--------+ +--+ | | +--------+ +--+ | | | |
578 * | +-------------------+ +-------------------+ | C | |
579 * | +-------------------+ +-------------------+ | a | |
580 * | | Core +----+ | | Core +----+ | | c | |
581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | |
582 * | | | Thread | +----+ | | | Thread | +----+ | | e | |
583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | |
584 * | | | Thread | |L1| | | | Thread | |L1| | | | |
585 * | | +--------+ +--+ | | +--------+ +--+ | | | |
586 * | +-------------------+ +-------------------+ +---+ |
587 * | |
588 * +--------------------------------------------------------+
589 *
590 * This first image represents a single Zen core complex that consists of four
591 * cores.
592 *
593 *
594 * +--------------------------------------------------------+
595 * | Zeppelin Die |
596 * | +--------------------------------------------------+ |
597 * | | I/O Units (PCIe, SATA, USB, etc.) | |
598 * | +--------------------------------------------------+ |
599 * | HH |
600 * | +-----------+ HH +-----------+ |
601 * | | | HH | | |
602 * | | Core |==========| Core | |
603 * | | Complex |==========| Complex | |
604 * | | | HH | | |
605 * | +-----------+ HH +-----------+ |
606 * | HH |
607 * | +--------------------------------------------------+ |
608 * | | Memory Controller | |
609 * | +--------------------------------------------------+ |
610 * | |
611 * +--------------------------------------------------------+
612 *
613 * This image represents a single Zeppelin Die. Note how both cores are
614 * connected to the same memory controller and I/O units. While each core
615 * complex has its own L3 cache as seen in the first image, they both have
616 * uniform access to memory.
617 *
618 *
619 * PP PP
620 * PP PP
621 * +----------PP---------------------PP---------+
622 * | PP PP |
623 * | +-----------+ +-----------+ |
624 * | | | | | |
625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
627 * | | | | | |
628 * | +-----------+ooo ...+-----------+ |
629 * | HH ooo ... HH |
630 * | HH oo.. HH |
631 * | HH ..oo HH |
632 * | HH ... ooo HH |
633 * | +-----------+... ooo+-----------+ |
634 * | | | | | |
635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
637 * | | | | | |
638 * | +-----------+ +-----------+ |
639 * | PP PP |
640 * +----------PP---------------------PP---------+
641 * PP PP
642 * PP PP
643 *
644 * This image represents a single Zen package. In this example, it has four
645 * Zeppelin dies, though some configurations only have a single one. In this
646 * example, each die is directly connected to the next. Also, each die is
647 * represented as being connected to memory by the 'M' character and connected
648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
649 * die is made up of two core complexes, we have multiple different NUMA
650 * domains that we care about for these systems.
651 *
652 * CPUID LEAVES
653 *
654 * There are a few different CPUID leaves that we can use to try and understand
655 * the actual state of the world. As part of the introduction of family 0xf, AMD
656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical
657 * processors that are in the system. Because families before Zen didn't have
658 * SMT, this was always the number of cores that were in the system. However, it
659 * should always be thought of as the number of logical threads to be consistent
660 * between generations. In addition we also get the size of the APIC ID that is
661 * used to represent the number of logical processors. This is important for
662 * deriving topology information.
663 *
664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
665 * bit between Bulldozer and later families, but it is quite useful in
666 * determining the topology information. Because this information has changed
667 * across family generations, it's worth calling out what these mean
668 * explicitly. The registers have the following meanings:
669 *
670 * %eax The APIC ID. The entire register is defined to have a 32-bit
671 * APIC ID, even though on systems without x2apic support, it will
672 * be limited to 8 bits.
673 *
674 * %ebx On Bulldozer-era systems this contains information about the
675 * number of cores that are in a compute unit (cores that share
676 * resources). It also contains a per-package compute unit ID that
677 * identifies which compute unit the logical CPU is a part of.
678 *
679 * On Zen-era systems this instead contains the number of threads
680 * per core and the ID of the core that the logical CPU is a part
681 * of. Note, this ID is unique only to the package, it is not
682 * globally unique across the entire system.
683 *
684 * %ecx This contains the number of nodes that exist in the package. It
685 * also contains an ID that identifies which node the logical CPU
686 * is a part of.
687 *
688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the
689 * cache layout to determine which logical CPUs are sharing which caches.
690 *
691 * illumos Topology
692 * ----------------
693 *
694 * Based on the above we synthesize the information into several different
695 * variables that we store in the 'struct cpuid_info'. We'll go into the details
696 * of what each member is supposed to represent and their uniqueness. In
697 * general, there are two levels of uniqueness that we care about. We care about
698 * an ID that is globally unique. That means that it will be unique across all
699 * entities in the system. For example, the default logical CPU ID is globally
700 * unique. On the other hand, there is some information that we only care about
701 * being unique within the context of a single package / socket. Here are the
702 * variables that we keep track of and their meaning.
703 *
704 * Several of the values that are asking for an identifier, with the exception
705 * of cpi_apicid, are allowed to be synthetic.
706 *
707 *
708 * cpi_apicid
709 *
710 * This is the value of the CPU's APIC id. This should be the full 32-bit
711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
712 * APIC ID. This value is globally unique between all logical CPUs across
713 * all packages. This is usually required by the APIC.
714 *
715 * cpi_chipid
716 *
717 * This value indicates the ID of the package that the logical CPU is a
718 * part of. This value is allowed to be synthetic. It is usually derived by
719 * taking the CPU's APIC ID and determining how many bits are used to
720 * represent CPU cores in the package. All logical CPUs that are part of
721 * the same package must have the same value.
722 *
723 * cpi_coreid
724 *
725 * This represents the ID of a CPU core. Two logical CPUs should only have
726 * the same cpi_coreid value if they are part of the same core. These
727 * values may be synthetic. On systems that support SMT, this value is
728 * usually derived from the APIC ID, otherwise it is often synthetic and
729 * just set to the value of the cpu_id in the cpu_t.
730 *
731 * cpi_pkgcoreid
732 *
733 * This is similar to the cpi_coreid in that logical CPUs that are part of
734 * the same core should have the same ID. The main difference is that these
735 * values are only required to be unique to a given socket.
736 *
737 * cpi_clogid
738 *
739 * This represents the logical ID of a logical CPU. This value should be
740 * unique within a given socket for each logical CPU. This is allowed to be
741 * synthetic, though it is usually based off of the CPU's apic ID. The
742 * broader system expects that logical CPUs that have are part of the same
743 * core have contiguous numbers. For example, if there were two threads per
744 * core, then the core IDs divided by two should be the same and the first
745 * modulus two should be zero and the second one. For example, IDs 4 and 5
746 * indicate two logical CPUs that are part of the same core. But IDs 5 and
747 * 6 represent two logical CPUs that are part of different cores.
748 *
749 * While it is common for the cpi_coreid and the cpi_clogid to be derived
750 * from the same source, strictly speaking, they don't have to be and the
751 * two values should be considered logically independent. One should not
752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
753 * some kind of relationship. While this is tempting, we've seen cases on
754 * AMD family 0xf where the system's cpu id is not related to its APIC ID.
755 *
756 * cpi_ncpu_per_chip
757 *
758 * This value indicates the total number of logical CPUs that exist in the
759 * physical package. Critically, this is not the number of logical CPUs
760 * that exist for just the single core.
761 *
762 * This value should be the same for all logical CPUs in the same package.
763 *
764 * cpi_ncore_per_chip
765 *
766 * This value indicates the total number of physical CPU cores that exist
767 * in the package. The system compares this value with cpi_ncpu_per_chip to
768 * determine if simultaneous multi-threading (SMT) is enabled. When
769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
770 * the X86FSET_HTT feature is not set. If this value is greater than one,
771 * than we consider the processor to have the feature X86FSET_CMP, to
772 * indicate that there is support for more than one core.
773 *
774 * This value should be the same for all logical CPUs in the same package.
775 *
776 * cpi_procnodes_per_pkg
777 *
778 * This value indicates the number of 'nodes' that exist in the package.
779 * When processors are actually a multi-chip module, this represents the
780 * number of such modules that exist in the package. Currently, on Intel
781 * based systems this member is always set to 1.
782 *
783 * This value should be the same for all logical CPUs in the same package.
784 *
785 * cpi_procnodeid
786 *
787 * This value indicates the ID of the node that the logical CPU is a part
788 * of. All logical CPUs that are in the same node must have the same value
789 * here. This value must be unique across all of the packages in the
790 * system. On Intel based systems, this is currently set to the value in
791 * cpi_chipid because there is only one node.
792 *
793 * cpi_cores_per_compunit
794 *
795 * This value indicates the number of cores that are part of a compute
796 * unit. See the AMD topology section for this. This member only has real
797 * meaning currently for AMD Bulldozer family processors. For all other
798 * processors, this should currently be set to 1.
799 *
800 * cpi_compunitid
801 *
802 * This indicates the compute unit that the logical CPU belongs to. For
803 * processors without AMD Bulldozer-style compute units this should be set
804 * to the value of cpi_coreid.
805 *
806 * cpi_ncpu_shr_last_cache
807 *
808 * This indicates the number of logical CPUs that are sharing the same last
809 * level cache. This value should be the same for all CPUs that are sharing
810 * that cache. The last cache refers to the cache that is closest to memory
811 * and furthest away from the CPU.
812 *
813 * cpi_last_lvl_cacheid
814 *
815 * This indicates the ID of the last cache that the logical CPU uses. This
816 * cache is often shared between multiple logical CPUs and is the cache
817 * that is closest to memory and furthest away from the CPU. This value
818 * should be the same for a group of logical CPUs only if they actually
819 * share the same last level cache. IDs should not overlap between
820 * packages.
821 *
822 * cpi_ncore_bits
823 *
824 * This indicates the number of bits that are required to represent all of
825 * the cores in the system. As cores are derived based on their APIC IDs,
826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
827 * this value to be larger than the actual number of IDs that are present
828 * in the system. This is used to size tables by the CMI framework. It is
829 * only filled in for Intel and AMD CPUs.
830 *
831 * cpi_nthread_bits
832 *
833 * This indicates the number of bits required to represent all of the IDs
834 * that cover the logical CPUs that exist on a given core. It's OK for this
835 * value to be larger than the actual number of IDs that are present in the
836 * system. This is used to size tables by the CMI framework. It is
837 * only filled in for Intel and AMD CPUs.
838 *
839 * -----------
840 * Hypervisors
841 * -----------
842 *
843 * If trying to manage the differences between vendors wasn't bad enough, it can
844 * get worse thanks to our friend hardware virtualization. Hypervisors are given
845 * the ability to interpose on all cpuid instructions and change them to suit
846 * their purposes. In general, this is necessary as the hypervisor wants to be
847 * able to present a more uniform set of features or not necessarily give the
848 * guest operating system kernel knowledge of all features so it can be
849 * more easily migrated between systems.
850 *
851 * When it comes to trying to determine topology information, this can be a
852 * double edged sword. When a hypervisor doesn't actually implement a cpuid
853 * leaf, it'll often return all zeros. Because of that, you'll often see various
854 * checks scattered about fields being non-zero before we assume we can use
855 * them.
856 *
857 * When it comes to topology information, the hypervisor is often incentivized
858 * to lie to you about topology. This is because it doesn't always actually
859 * guarantee that topology at all. The topology path we take in the system
860 * depends on how the CPU advertises itself. If it advertises itself as an Intel
861 * or AMD CPU, then we basically do our normal path. However, when they don't
862 * use an actual vendor, then that usually turns into multiple one-core CPUs
863 * that we enumerate that are often on different sockets. The actual behavior
864 * depends greatly on what the hypervisor actually exposes to us.
865 *
866 * --------------------
867 * Exposing Information
868 * --------------------
869 *
870 * We expose CPUID information in three different forms in the system.
871 *
872 * The first is through the x86_featureset variable. This is used in conjunction
873 * with the is_x86_feature() function. This is queried by x86-specific functions
874 * to determine which features are or aren't present in the system and to make
875 * decisions based upon them. For example, users of this include everything from
876 * parts of the system dedicated to reliability, availability, and
877 * serviceability (RAS), to making decisions about how to handle security
878 * mitigations, to various x86-specific drivers. General purpose or
879 * architecture independent drivers should never be calling this function.
880 *
881 * The second means is through the auxiliary vector. The auxiliary vector is a
882 * series of tagged data that the kernel passes down to a user program when it
883 * begins executing. This information is used to indicate to programs what
884 * instruction set extensions are present. For example, information about the
885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down
886 * since user programs cannot make use of it. However, things like the AVX
887 * instruction sets are. Programs use this information to make run-time
888 * decisions about what features they should use. As an example, the run-time
889 * link-editor (rtld) can relocate different functions depending on the hardware
890 * support available.
891 *
892 * The final form is through a series of accessor functions that all have the
893 * form cpuid_get*. This is used by a number of different subsystems in the
894 * kernel to determine more detailed information about what we're running on,
895 * topology information, etc. Some of these subsystems include processor groups
896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
897 * microcode, and performance monitoring. These functions all ASSERT that the
898 * CPU they're being called on has reached a certain cpuid pass. If the passes
899 * are rearranged, then this needs to be adjusted.
900 */
901
902 #include <sys/types.h>
903 #include <sys/archsystm.h>
904 #include <sys/x86_archext.h>
905 #include <sys/kmem.h>
906 #include <sys/systm.h>
907 #include <sys/cmn_err.h>
908 #include <sys/sunddi.h>
909 #include <sys/sunndi.h>
910 #include <sys/cpuvar.h>
911 #include <sys/processor.h>
912 #include <sys/sysmacros.h>
913 #include <sys/pg.h>
914 #include <sys/fp.h>
915 #include <sys/controlregs.h>
916 #include <sys/bitmap.h>
917 #include <sys/auxv_386.h>
918 #include <sys/memnode.h>
919 #include <sys/pci_cfgspace.h>
920 #include <sys/comm_page.h>
921 #include <sys/mach_mmu.h>
922 #include <sys/ucode.h>
923 #include <sys/tsc.h>
924
925 #ifdef __xpv
926 #include <sys/hypervisor.h>
927 #else
928 #include <sys/ontrap.h>
929 #endif
930
931 uint_t x86_vendor = X86_VENDOR_IntelClone;
932 uint_t x86_type = X86_TYPE_OTHER;
933 uint_t x86_clflush_size = 0;
934
935 #if defined(__xpv)
936 int x86_use_pcid = 0;
937 int x86_use_invpcid = 0;
938 #else
939 int x86_use_pcid = -1;
940 int x86_use_invpcid = -1;
941 #endif
942
943 uint_t pentiumpro_bug4046376;
944
945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
946
947 static char *x86_feature_names[NUM_X86_FEATURES] = {
948 "lgpg",
949 "tsc",
950 "msr",
951 "mtrr",
952 "pge",
953 "de",
954 "cmov",
955 "mmx",
956 "mca",
957 "pae",
958 "cv8",
959 "pat",
960 "sep",
961 "sse",
962 "sse2",
963 "htt",
964 "asysc",
965 "nx",
966 "sse3",
967 "cx16",
968 "cmp",
969 "tscp",
970 "mwait",
971 "sse4a",
972 "cpuid",
973 "ssse3",
974 "sse4_1",
975 "sse4_2",
976 "1gpg",
977 "clfsh",
978 "64",
979 "aes",
980 "pclmulqdq",
981 "xsave",
982 "avx",
983 "vmx",
984 "svm",
985 "topoext",
986 "f16c",
987 "rdrand",
988 "x2apic",
989 "avx2",
990 "bmi1",
991 "bmi2",
992 "fma",
993 "smep",
994 "smap",
995 "adx",
996 "rdseed",
997 "mpx",
998 "avx512f",
999 "avx512dq",
1000 "avx512pf",
1001 "avx512er",
1002 "avx512cd",
1003 "avx512bw",
1004 "avx512vl",
1005 "avx512fma",
1006 "avx512vbmi",
1007 "avx512_vpopcntdq",
1008 "avx512_4vnniw",
1009 "avx512_4fmaps",
1010 "xsaveopt",
1011 "xsavec",
1012 "xsaves",
1013 "sha",
1014 "umip",
1015 "pku",
1016 "ospke",
1017 "pcid",
1018 "invpcid",
1019 "ibrs",
1020 "ibpb",
1021 "stibp",
1022 "ssbd",
1023 "ssbd_virt",
1024 "rdcl_no",
1025 "ibrs_all",
1026 "rsba",
1027 "ssb_no",
1028 "stibp_all",
1029 "flush_cmd",
1030 "l1d_vmentry_no",
1031 "fsgsbase",
1032 "clflushopt",
1033 "clwb",
1034 "monitorx",
1035 "clzero",
1036 "xop",
1037 "fma4",
1038 "tbm",
1039 "avx512_vnni"
1040 };
1041
1042 boolean_t
1043 is_x86_feature(void *featureset, uint_t feature)
1044 {
1045 ASSERT(feature < NUM_X86_FEATURES);
1046 return (BT_TEST((ulong_t *)featureset, feature));
1047 }
1048
1049 void
1050 add_x86_feature(void *featureset, uint_t feature)
1051 {
1052 ASSERT(feature < NUM_X86_FEATURES);
1053 BT_SET((ulong_t *)featureset, feature);
1054 }
1055
1056 void
1057 remove_x86_feature(void *featureset, uint_t feature)
1058 {
1059 ASSERT(feature < NUM_X86_FEATURES);
1060 BT_CLEAR((ulong_t *)featureset, feature);
1061 }
1062
1063 boolean_t
1064 compare_x86_featureset(void *setA, void *setB)
1065 {
1066 /*
1067 * We assume that the unused bits of the bitmap are always zero.
1068 */
1069 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1070 return (B_TRUE);
1071 } else {
1072 return (B_FALSE);
1073 }
1074 }
1075
1076 void
1077 print_x86_featureset(void *featureset)
1078 {
1079 uint_t i;
1080
1081 for (i = 0; i < NUM_X86_FEATURES; i++) {
1082 if (is_x86_feature(featureset, i)) {
1083 cmn_err(CE_CONT, "?x86_feature: %s\n",
1084 x86_feature_names[i]);
1085 }
1086 }
1087 }
1088
1089 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1090 static size_t xsave_state_size = 0;
1091 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1092 boolean_t xsave_force_disable = B_FALSE;
1093 extern int disable_smap;
1094
1095 /*
1096 * This is set to platform type we are running on.
1097 */
1098 static int platform_type = -1;
1099
1100 #if !defined(__xpv)
1101 /*
1102 * Variable to patch if hypervisor platform detection needs to be
1103 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1104 */
1105 int enable_platform_detection = 1;
1106 #endif
1107
1108 /*
1109 * monitor/mwait info.
1110 *
1111 * size_actual and buf_actual are the real address and size allocated to get
1112 * proper mwait_buf alignement. buf_actual and size_actual should be passed
1113 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
1114 * processor cache-line alignment, but this is not guarantied in the furture.
1115 */
1116 struct mwait_info {
1117 size_t mon_min; /* min size to avoid missed wakeups */
1118 size_t mon_max; /* size to avoid false wakeups */
1119 size_t size_actual; /* size actually allocated */
1120 void *buf_actual; /* memory actually allocated */
1121 uint32_t support; /* processor support of monitor/mwait */
1122 };
1123
1124 /*
1125 * xsave/xrestor info.
1126 *
1127 * This structure contains HW feature bits and the size of the xsave save area.
1128 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1129 * (xsave_state) to describe the xsave layout. However, at runtime the
1130 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1131 * xsave_state structure simply represents the legacy layout of the beginning
1132 * of the xsave area.
1133 */
1134 struct xsave_info {
1135 uint32_t xsav_hw_features_low; /* Supported HW features */
1136 uint32_t xsav_hw_features_high; /* Supported HW features */
1137 size_t xsav_max_size; /* max size save area for HW features */
1138 size_t ymm_size; /* AVX: size of ymm save area */
1139 size_t ymm_offset; /* AVX: offset for ymm save area */
1140 size_t bndregs_size; /* MPX: size of bndregs save area */
1141 size_t bndregs_offset; /* MPX: offset for bndregs save area */
1142 size_t bndcsr_size; /* MPX: size of bndcsr save area */
1143 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */
1144 size_t opmask_size; /* AVX512: size of opmask save */
1145 size_t opmask_offset; /* AVX512: offset for opmask save */
1146 size_t zmmlo_size; /* AVX512: size of zmm 256 save */
1147 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */
1148 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */
1149 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */
1150 };
1151
1152
1153 /*
1154 * These constants determine how many of the elements of the
1155 * cpuid we cache in the cpuid_info data structure; the
1156 * remaining elements are accessible via the cpuid instruction.
1157 */
1158
1159 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
1160 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
1161
1162 /*
1163 * See the big theory statement for a more detailed explanation of what some of
1164 * these members mean.
1165 */
1166 struct cpuid_info {
1167 uint_t cpi_pass; /* last pass completed */
1168 /*
1169 * standard function information
1170 */
1171 uint_t cpi_maxeax; /* fn 0: %eax */
1172 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
1173 uint_t cpi_vendor; /* enum of cpi_vendorstr */
1174
1175 uint_t cpi_family; /* fn 1: extended family */
1176 uint_t cpi_model; /* fn 1: extended model */
1177 uint_t cpi_step; /* fn 1: stepping */
1178 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
1179 /* AMD: package/socket # */
1180 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
1181 int cpi_clogid; /* fn 1: %ebx: thread # */
1182 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
1183 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
1184 uint_t cpi_ncache; /* fn 2: number of elements */
1185 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1186 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
1187 uint_t cpi_cache_leaf_size; /* Number of cache elements */
1188 /* Intel fn: 4, AMD fn: 8000001d */
1189 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */
1190 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
1191 /*
1192 * extended function information
1193 */
1194 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
1195 char cpi_brandstr[49]; /* fn 0x8000000[234] */
1196 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
1197 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
1198 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */
1199 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
1200
1201 id_t cpi_coreid; /* same coreid => strands share core */
1202 int cpi_pkgcoreid; /* core number within single package */
1203 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
1204 /* Intel: fn 4: %eax[31-26] */
1205
1206 /*
1207 * These values represent the number of bits that are required to store
1208 * information about the number of cores and threads.
1209 */
1210 uint_t cpi_ncore_bits;
1211 uint_t cpi_nthread_bits;
1212 /*
1213 * supported feature information
1214 */
1215 uint32_t cpi_support[6];
1216 #define STD_EDX_FEATURES 0
1217 #define AMD_EDX_FEATURES 1
1218 #define TM_EDX_FEATURES 2
1219 #define STD_ECX_FEATURES 3
1220 #define AMD_ECX_FEATURES 4
1221 #define STD_EBX_FEATURES 5
1222 /*
1223 * Synthesized information, where known.
1224 */
1225 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
1226 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
1227 uint32_t cpi_socket; /* Chip package/socket type */
1228
1229 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
1230 uint32_t cpi_apicid;
1231 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
1232 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
1233 /* Intel: 1 */
1234 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
1235 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
1236
1237 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
1238 };
1239
1240
1241 static struct cpuid_info cpuid_info0;
1242
1243 /*
1244 * These bit fields are defined by the Intel Application Note AP-485
1245 * "Intel Processor Identification and the CPUID Instruction"
1246 */
1247 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1248 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1249 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1250 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1251 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1252 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1253
1254 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
1255 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
1256 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
1257 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
1258 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
1259 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx)
1260 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx)
1261
1262 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1263 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1264 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1265 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1266
1267 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
1268 #define CPI_XMAXEAX_MAX 0x80000100
1269 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
1270 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
1271
1272 /*
1273 * Function 4 (Deterministic Cache Parameters) macros
1274 * Defined by Intel Application Note AP-485
1275 */
1276 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
1277 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
1278 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
1279 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
1280 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
1281 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
1282 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
1283
1284 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
1285 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
1286 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
1287
1288 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
1289
1290 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
1291
1292
1293 /*
1294 * A couple of shorthand macros to identify "later" P6-family chips
1295 * like the Pentium M and Core. First, the "older" P6-based stuff
1296 * (loosely defined as "pre-Pentium-4"):
1297 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1298 */
1299 #define IS_LEGACY_P6(cpi) ( \
1300 cpi->cpi_family == 6 && \
1301 (cpi->cpi_model == 1 || \
1302 cpi->cpi_model == 3 || \
1303 cpi->cpi_model == 5 || \
1304 cpi->cpi_model == 6 || \
1305 cpi->cpi_model == 7 || \
1306 cpi->cpi_model == 8 || \
1307 cpi->cpi_model == 0xA || \
1308 cpi->cpi_model == 0xB) \
1309 )
1310
1311 /* A "new F6" is everything with family 6 that's not the above */
1312 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1313
1314 /* Extended family/model support */
1315 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1316 cpi->cpi_family >= 0xf)
1317
1318 /*
1319 * Info for monitor/mwait idle loop.
1320 *
1321 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1322 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1323 * 2006.
1324 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1325 * Documentation Updates" #33633, Rev 2.05, December 2006.
1326 */
1327 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
1328 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
1329 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
1330 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1331 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
1332 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
1333 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1334 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1335 /*
1336 * Number of sub-cstates for a given c-state.
1337 */
1338 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
1339 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1340
1341 /*
1342 * XSAVE leaf 0xD enumeration
1343 */
1344 #define CPUID_LEAFD_2_YMM_OFFSET 576
1345 #define CPUID_LEAFD_2_YMM_SIZE 256
1346
1347 /*
1348 * Common extended leaf names to cut down on typos.
1349 */
1350 #define CPUID_LEAF_EXT_0 0x80000000
1351 #define CPUID_LEAF_EXT_8 0x80000008
1352 #define CPUID_LEAF_EXT_1d 0x8000001d
1353 #define CPUID_LEAF_EXT_1e 0x8000001e
1354
1355 /*
1356 * Functions we consune from cpuid_subr.c; don't publish these in a header
1357 * file to try and keep people using the expected cpuid_* interfaces.
1358 */
1359 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1360 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1361 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1362 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1363 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1364
1365 /*
1366 * Apply up various platform-dependent restrictions where the
1367 * underlying platform restrictions mean the CPU can be marked
1368 * as less capable than its cpuid instruction would imply.
1369 */
1370 #if defined(__xpv)
1371 static void
1372 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1373 {
1374 switch (eax) {
1375 case 1: {
1376 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1377 0 : CPUID_INTC_EDX_MCA;
1378 cp->cp_edx &=
1379 ~(mcamask |
1380 CPUID_INTC_EDX_PSE |
1381 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1382 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1383 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1384 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1385 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1386 break;
1387 }
1388
1389 case 0x80000001:
1390 cp->cp_edx &=
1391 ~(CPUID_AMD_EDX_PSE |
1392 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1393 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1394 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1395 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1396 CPUID_AMD_EDX_TSCP);
1397 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1398 break;
1399 default:
1400 break;
1401 }
1402
1403 switch (vendor) {
1404 case X86_VENDOR_Intel:
1405 switch (eax) {
1406 case 4:
1407 /*
1408 * Zero out the (ncores-per-chip - 1) field
1409 */
1410 cp->cp_eax &= 0x03fffffff;
1411 break;
1412 default:
1413 break;
1414 }
1415 break;
1416 case X86_VENDOR_AMD:
1417 switch (eax) {
1418
1419 case 0x80000001:
1420 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1421 break;
1422
1423 case CPUID_LEAF_EXT_8:
1424 /*
1425 * Zero out the (ncores-per-chip - 1) field
1426 */
1427 cp->cp_ecx &= 0xffffff00;
1428 break;
1429 default:
1430 break;
1431 }
1432 break;
1433 default:
1434 break;
1435 }
1436 }
1437 #else
1438 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
1439 #endif
1440
1441 /*
1442 * Some undocumented ways of patching the results of the cpuid
1443 * instruction to permit running Solaris 10 on future cpus that
1444 * we don't currently support. Could be set to non-zero values
1445 * via settings in eeprom.
1446 */
1447
1448 uint32_t cpuid_feature_ecx_include;
1449 uint32_t cpuid_feature_ecx_exclude;
1450 uint32_t cpuid_feature_edx_include;
1451 uint32_t cpuid_feature_edx_exclude;
1452
1453 /*
1454 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1455 */
1456 void
1457 cpuid_alloc_space(cpu_t *cpu)
1458 {
1459 /*
1460 * By convention, cpu0 is the boot cpu, which is set up
1461 * before memory allocation is available. All other cpus get
1462 * their cpuid_info struct allocated here.
1463 */
1464 ASSERT(cpu->cpu_id != 0);
1465 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1466 cpu->cpu_m.mcpu_cpi =
1467 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1468 }
1469
1470 void
1471 cpuid_free_space(cpu_t *cpu)
1472 {
1473 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1474 int i;
1475
1476 ASSERT(cpi != NULL);
1477 ASSERT(cpi != &cpuid_info0);
1478
1479 /*
1480 * Free up any cache leaf related dynamic storage. The first entry was
1481 * cached from the standard cpuid storage, so we should not free it.
1482 */
1483 for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1484 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1485 if (cpi->cpi_cache_leaf_size > 0)
1486 kmem_free(cpi->cpi_cache_leaves,
1487 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1488
1489 kmem_free(cpi, sizeof (*cpi));
1490 cpu->cpu_m.mcpu_cpi = NULL;
1491 }
1492
1493 #if !defined(__xpv)
1494 /*
1495 * Determine the type of the underlying platform. This is used to customize
1496 * initialization of various subsystems (e.g. TSC). determine_platform() must
1497 * only ever be called once to prevent two processors from seeing different
1498 * values of platform_type. Must be called before cpuid_pass1(), the earliest
1499 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1500 */
1501 void
1502 determine_platform(void)
1503 {
1504 struct cpuid_regs cp;
1505 uint32_t base;
1506 uint32_t regs[4];
1507 char *hvstr = (char *)regs;
1508
1509 ASSERT(platform_type == -1);
1510
1511 platform_type = HW_NATIVE;
1512
1513 if (!enable_platform_detection)
1514 return;
1515
1516 /*
1517 * If Hypervisor CPUID bit is set, try to determine hypervisor
1518 * vendor signature, and set platform type accordingly.
1519 *
1520 * References:
1521 * http://lkml.org/lkml/2008/10/1/246
1522 * http://kb.vmware.com/kb/1009458
1523 */
1524 cp.cp_eax = 0x1;
1525 (void) __cpuid_insn(&cp);
1526 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1527 cp.cp_eax = 0x40000000;
1528 (void) __cpuid_insn(&cp);
1529 regs[0] = cp.cp_ebx;
1530 regs[1] = cp.cp_ecx;
1531 regs[2] = cp.cp_edx;
1532 regs[3] = 0;
1533 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1534 platform_type = HW_XEN_HVM;
1535 return;
1536 }
1537 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1538 platform_type = HW_VMWARE;
1539 return;
1540 }
1541 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1542 platform_type = HW_KVM;
1543 return;
1544 }
1545 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1546 platform_type = HW_BHYVE;
1547 return;
1548 }
1549 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1550 platform_type = HW_MICROSOFT;
1551 } else {
1552 /*
1553 * Check older VMware hardware versions. VMware hypervisor is
1554 * detected by performing an IN operation to VMware hypervisor
1555 * port and checking that value returned in %ebx is VMware
1556 * hypervisor magic value.
1557 *
1558 * References: http://kb.vmware.com/kb/1009458
1559 */
1560 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1561 if (regs[1] == VMWARE_HVMAGIC) {
1562 platform_type = HW_VMWARE;
1563 return;
1564 }
1565 }
1566
1567 /*
1568 * Check Xen hypervisor. In a fully virtualized domain,
1569 * Xen's pseudo-cpuid function returns a string representing the
1570 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1571 * supported cpuid function. We need at least a (base + 2) leaf value
1572 * to do what we want to do. Try different base values, since the
1573 * hypervisor might use a different one depending on whether Hyper-V
1574 * emulation is switched on by default or not.
1575 */
1576 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1577 cp.cp_eax = base;
1578 (void) __cpuid_insn(&cp);
1579 regs[0] = cp.cp_ebx;
1580 regs[1] = cp.cp_ecx;
1581 regs[2] = cp.cp_edx;
1582 regs[3] = 0;
1583 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1584 cp.cp_eax >= (base + 2)) {
1585 platform_type &= ~HW_NATIVE;
1586 platform_type |= HW_XEN_HVM;
1587 return;
1588 }
1589 }
1590 }
1591
1592 int
1593 get_hwenv(void)
1594 {
1595 ASSERT(platform_type != -1);
1596 return (platform_type);
1597 }
1598
1599 int
1600 is_controldom(void)
1601 {
1602 return (0);
1603 }
1604
1605 #else
1606
1607 int
1608 get_hwenv(void)
1609 {
1610 return (HW_XEN_PV);
1611 }
1612
1613 int
1614 is_controldom(void)
1615 {
1616 return (DOMAIN_IS_INITDOMAIN(xen_info));
1617 }
1618
1619 #endif /* __xpv */
1620
1621 /*
1622 * Make sure that we have gathered all of the CPUID leaves that we might need to
1623 * determine topology. We assume that the standard leaf 1 has already been done
1624 * and that xmaxeax has already been calculated.
1625 */
1626 static void
1627 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1628 {
1629 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1630
1631 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1632 struct cpuid_regs *cp;
1633
1634 cp = &cpi->cpi_extd[8];
1635 cp->cp_eax = CPUID_LEAF_EXT_8;
1636 (void) __cpuid_insn(cp);
1637 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1638 }
1639
1640 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1641 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1642 struct cpuid_regs *cp;
1643
1644 cp = &cpi->cpi_extd[0x1e];
1645 cp->cp_eax = CPUID_LEAF_EXT_1e;
1646 (void) __cpuid_insn(cp);
1647 }
1648 }
1649
1650 /*
1651 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1652 * it to everything else. If not, and we're on an AMD system where 8000001e is
1653 * valid, then we use that. Othewrise, we fall back to the default value for the
1654 * APIC ID in leaf 1.
1655 */
1656 static uint32_t
1657 cpuid_gather_apicid(struct cpuid_info *cpi)
1658 {
1659 /*
1660 * Leaf B changes based on the arguments to it. Beacuse we don't cache
1661 * it, we need to gather it again.
1662 */
1663 if (cpi->cpi_maxeax >= 0xB) {
1664 struct cpuid_regs regs;
1665 struct cpuid_regs *cp;
1666
1667 cp = ®s;
1668 cp->cp_eax = 0xB;
1669 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1670 (void) __cpuid_insn(cp);
1671
1672 if (cp->cp_ebx != 0) {
1673 return (cp->cp_edx);
1674 }
1675 }
1676
1677 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1678 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1679 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1680 return (cpi->cpi_extd[0x1e].cp_eax);
1681 }
1682
1683 return (CPI_APIC_ID(cpi));
1684 }
1685
1686 /*
1687 * For AMD processors, attempt to calculate the number of chips and cores that
1688 * exist. The way that we do this varies based on the generation, because the
1689 * generations themselves have changed dramatically.
1690 *
1691 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1692 * However, with the advent of family 17h (Zen) it actually tells us the number
1693 * of threads, so we need to look at leaf 0x8000001e if available to determine
1694 * its value. Otherwise, for all prior families, the number of enabled cores is
1695 * the same as threads.
1696 *
1697 * If we do not have leaf 0x80000008, then we assume that this processor does
1698 * not have anything. AMD's older CPUID specification says there's no reason to
1699 * fall back to leaf 1.
1700 *
1701 * In some virtualization cases we will not have leaf 8000001e or it will be
1702 * zero. When that happens we assume the number of threads is one.
1703 */
1704 static void
1705 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1706 {
1707 uint_t nthreads, nthread_per_core;
1708
1709 nthreads = nthread_per_core = 1;
1710
1711 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1712 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1713 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1714 nthreads = CPI_CPU_COUNT(cpi);
1715 }
1716
1717 /*
1718 * For us to have threads, and know about it, we have to be at least at
1719 * family 17h and have the cpuid bit that says we have extended
1720 * topology.
1721 */
1722 if (cpi->cpi_family >= 0x17 &&
1723 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1724 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1725 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1726 }
1727
1728 *ncpus = nthreads;
1729 *ncores = nthreads / nthread_per_core;
1730 }
1731
1732 /*
1733 * Seed the initial values for the cores and threads for an Intel based
1734 * processor. These values will be overwritten if we detect that the processor
1735 * supports CPUID leaf 0xb.
1736 */
1737 static void
1738 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1739 {
1740 /*
1741 * Only seed the number of physical cores from the first level leaf 4
1742 * information. The number of threads there indicate how many share the
1743 * L1 cache, which may or may not have anything to do with the number of
1744 * logical CPUs per core.
1745 */
1746 if (cpi->cpi_maxeax >= 4) {
1747 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1748 } else {
1749 *ncores = 1;
1750 }
1751
1752 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1753 *ncpus = CPI_CPU_COUNT(cpi);
1754 } else {
1755 *ncpus = *ncores;
1756 }
1757 }
1758
1759 static boolean_t
1760 cpuid_leafB_getids(cpu_t *cpu)
1761 {
1762 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1763 struct cpuid_regs regs;
1764 struct cpuid_regs *cp;
1765
1766 if (cpi->cpi_maxeax < 0xB)
1767 return (B_FALSE);
1768
1769 cp = ®s;
1770 cp->cp_eax = 0xB;
1771 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1772
1773 (void) __cpuid_insn(cp);
1774
1775 /*
1776 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1777 * indicates that the extended topology enumeration leaf is
1778 * available.
1779 */
1780 if (cp->cp_ebx != 0) {
1781 uint32_t x2apic_id = 0;
1782 uint_t coreid_shift = 0;
1783 uint_t ncpu_per_core = 1;
1784 uint_t chipid_shift = 0;
1785 uint_t ncpu_per_chip = 1;
1786 uint_t i;
1787 uint_t level;
1788
1789 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1790 cp->cp_eax = 0xB;
1791 cp->cp_ecx = i;
1792
1793 (void) __cpuid_insn(cp);
1794 level = CPI_CPU_LEVEL_TYPE(cp);
1795
1796 if (level == 1) {
1797 x2apic_id = cp->cp_edx;
1798 coreid_shift = BITX(cp->cp_eax, 4, 0);
1799 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1800 } else if (level == 2) {
1801 x2apic_id = cp->cp_edx;
1802 chipid_shift = BITX(cp->cp_eax, 4, 0);
1803 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1804 }
1805 }
1806
1807 /*
1808 * cpi_apicid is taken care of in cpuid_gather_apicid.
1809 */
1810 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1811 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1812 ncpu_per_core;
1813 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1814 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1815 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1816 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1817 cpi->cpi_procnodeid = cpi->cpi_chipid;
1818 cpi->cpi_compunitid = cpi->cpi_coreid;
1819
1820 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1821 cpi->cpi_nthread_bits = coreid_shift;
1822 cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1823 }
1824
1825 return (B_TRUE);
1826 } else {
1827 return (B_FALSE);
1828 }
1829 }
1830
1831 static void
1832 cpuid_intel_getids(cpu_t *cpu, void *feature)
1833 {
1834 uint_t i;
1835 uint_t chipid_shift = 0;
1836 uint_t coreid_shift = 0;
1837 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1838
1839 /*
1840 * There are no compute units or processor nodes currently on Intel.
1841 * Always set these to one.
1842 */
1843 cpi->cpi_procnodes_per_pkg = 1;
1844 cpi->cpi_cores_per_compunit = 1;
1845
1846 /*
1847 * If cpuid Leaf B is present, use that to try and get this information.
1848 * It will be the most accurate for Intel CPUs.
1849 */
1850 if (cpuid_leafB_getids(cpu))
1851 return;
1852
1853 /*
1854 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1855 * and ncore_per_chip. These represent the largest power of two values
1856 * that we need to cover all of the IDs in the system. Therefore, we use
1857 * those values to seed the number of bits needed to cover information
1858 * in the case when leaf B is not available. These values will probably
1859 * be larger than required, but that's OK.
1860 */
1861 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1862 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1863
1864 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1865 chipid_shift++;
1866
1867 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1868 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1869
1870 if (is_x86_feature(feature, X86FSET_CMP)) {
1871 /*
1872 * Multi-core (and possibly multi-threaded)
1873 * processors.
1874 */
1875 uint_t ncpu_per_core;
1876 if (cpi->cpi_ncore_per_chip == 1)
1877 ncpu_per_core = cpi->cpi_ncpu_per_chip;
1878 else if (cpi->cpi_ncore_per_chip > 1)
1879 ncpu_per_core = cpi->cpi_ncpu_per_chip /
1880 cpi->cpi_ncore_per_chip;
1881 /*
1882 * 8bit APIC IDs on dual core Pentiums
1883 * look like this:
1884 *
1885 * +-----------------------+------+------+
1886 * | Physical Package ID | MC | HT |
1887 * +-----------------------+------+------+
1888 * <------- chipid -------->
1889 * <------- coreid --------------->
1890 * <--- clogid -->
1891 * <------>
1892 * pkgcoreid
1893 *
1894 * Where the number of bits necessary to
1895 * represent MC and HT fields together equals
1896 * to the minimum number of bits necessary to
1897 * store the value of cpi->cpi_ncpu_per_chip.
1898 * Of those bits, the MC part uses the number
1899 * of bits necessary to store the value of
1900 * cpi->cpi_ncore_per_chip.
1901 */
1902 for (i = 1; i < ncpu_per_core; i <<= 1)
1903 coreid_shift++;
1904 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1905 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1906 } else if (is_x86_feature(feature, X86FSET_HTT)) {
1907 /*
1908 * Single-core multi-threaded processors.
1909 */
1910 cpi->cpi_coreid = cpi->cpi_chipid;
1911 cpi->cpi_pkgcoreid = 0;
1912 } else {
1913 /*
1914 * Single-core single-thread processors.
1915 */
1916 cpi->cpi_coreid = cpu->cpu_id;
1917 cpi->cpi_pkgcoreid = 0;
1918 }
1919 cpi->cpi_procnodeid = cpi->cpi_chipid;
1920 cpi->cpi_compunitid = cpi->cpi_coreid;
1921 }
1922
1923 /*
1924 * Historically, AMD has had CMP chips with only a single thread per core.
1925 * However, starting in family 17h (Zen), this has changed and they now have
1926 * multiple threads. Our internal core id needs to be a unique value.
1927 *
1928 * To determine the core id of an AMD system, if we're from a family before 17h,
1929 * then we just use the cpu id, as that gives us a good value that will be
1930 * unique for each core. If instead, we're on family 17h or later, then we need
1931 * to do something more complicated. CPUID leaf 0x8000001e can tell us
1932 * how many threads are in the system. Based on that, we'll shift the APIC ID.
1933 * We can't use the normal core id in that leaf as it's only unique within the
1934 * socket, which is perfect for cpi_pkgcoreid, but not us.
1935 */
1936 static id_t
1937 cpuid_amd_get_coreid(cpu_t *cpu)
1938 {
1939 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1940
1941 if (cpi->cpi_family >= 0x17 &&
1942 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1943 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1944 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1945 if (nthreads > 1) {
1946 VERIFY3U(nthreads, ==, 2);
1947 return (cpi->cpi_apicid >> 1);
1948 }
1949 }
1950
1951 return (cpu->cpu_id);
1952 }
1953
1954 /*
1955 * IDs on AMD is a more challenging task. This is notable because of the
1956 * following two facts:
1957 *
1958 * 1. Before family 0x17 (Zen), there was no support for SMT and there was
1959 * also no way to get an actual unique core id from the system. As such, we
1960 * synthesize this case by using cpu->cpu_id. This scheme does not,
1961 * however, guarantee that sibling cores of a chip will have sequential
1962 * coreids starting at a multiple of the number of cores per chip - that is
1963 * usually the case, but if the ACPI MADT table is presented in a different
1964 * order then we need to perform a few more gymnastics for the pkgcoreid.
1965 *
1966 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1967 * called compute units. These compute units share the L1I cache, L2 cache,
1968 * and the FPU. To deal with this, a new topology leaf was added in
1969 * 0x8000001e. However, parts of this leaf have different meanings
1970 * once we get to family 0x17.
1971 */
1972
1973 static void
1974 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1975 {
1976 int i, first_half, coreidsz;
1977 uint32_t nb_caps_reg;
1978 uint_t node2_1;
1979 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1980 struct cpuid_regs *cp;
1981
1982 /*
1983 * Calculate the core id (this comes from hardware in family 0x17 if it
1984 * hasn't been stripped by virtualization). We always set the compute
1985 * unit id to the same value. Also, initialize the default number of
1986 * cores per compute unit and nodes per package. This will be
1987 * overwritten when we know information about a particular family.
1988 */
1989 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1990 cpi->cpi_compunitid = cpi->cpi_coreid;
1991 cpi->cpi_cores_per_compunit = 1;
1992 cpi->cpi_procnodes_per_pkg = 1;
1993
1994 /*
1995 * To construct the logical ID, we need to determine how many APIC IDs
1996 * are dedicated to the cores and threads. This is provided for us in
1997 * 0x80000008. However, if it's not present (say due to virtualization),
1998 * then we assume it's one. This should be present on all 64-bit AMD
1999 * processors. It was added in family 0xf (Hammer).
2000 */
2001 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2002 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2003
2004 /*
2005 * In AMD parlance chip is really a node while illumos
2006 * uses chip as equivalent to socket/package.
2007 */
2008 if (coreidsz == 0) {
2009 /* Use legacy method */
2010 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2011 coreidsz++;
2012 if (coreidsz == 0)
2013 coreidsz = 1;
2014 }
2015 } else {
2016 /* Assume single-core part */
2017 coreidsz = 1;
2018 }
2019 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2020
2021 /*
2022 * The package core ID varies depending on the family. For family 17h,
2023 * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2024 * can use the clogid as is. When family 17h is virtualized, the clogid
2025 * should be sufficient as if we don't have valid data in the leaf, then
2026 * we won't think we have SMT, in which case the cpi_clogid should be
2027 * sufficient.
2028 */
2029 if (cpi->cpi_family >= 0x17 &&
2030 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2031 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2032 cpi->cpi_extd[0x1e].cp_ebx != 0) {
2033 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2034 } else {
2035 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2036 }
2037
2038 /*
2039 * Obtain the node ID and compute unit IDs. If we're on family 0x15
2040 * (bulldozer) or newer, then we can derive all of this from leaf
2041 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2042 */
2043 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2044 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2045 cp = &cpi->cpi_extd[0x1e];
2046
2047 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2048 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2049
2050 /*
2051 * For Bulldozer-era CPUs, recalculate the compute unit
2052 * information.
2053 */
2054 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2055 cpi->cpi_cores_per_compunit =
2056 BITX(cp->cp_ebx, 15, 8) + 1;
2057 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2058 (cpi->cpi_ncore_per_chip /
2059 cpi->cpi_cores_per_compunit) *
2060 (cpi->cpi_procnodeid /
2061 cpi->cpi_procnodes_per_pkg);
2062 }
2063 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2064 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2065 } else if (cpi->cpi_family == 0x10) {
2066 /*
2067 * See if we are a multi-node processor.
2068 * All processors in the system have the same number of nodes
2069 */
2070 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
2071 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2072 /* Single-node */
2073 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2074 coreidsz);
2075 } else {
2076
2077 /*
2078 * Multi-node revision D (2 nodes per package
2079 * are supported)
2080 */
2081 cpi->cpi_procnodes_per_pkg = 2;
2082
2083 first_half = (cpi->cpi_pkgcoreid <=
2084 (cpi->cpi_ncore_per_chip/2 - 1));
2085
2086 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2087 /* We are BSP */
2088 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2089 } else {
2090
2091 /* We are AP */
2092 /* NodeId[2:1] bits to use for reading F3xe8 */
2093 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2094
2095 nb_caps_reg =
2096 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2097
2098 /*
2099 * Check IntNodeNum bit (31:30, but bit 31 is
2100 * always 0 on dual-node processors)
2101 */
2102 if (BITX(nb_caps_reg, 30, 30) == 0)
2103 cpi->cpi_procnodeid = node2_1 +
2104 !first_half;
2105 else
2106 cpi->cpi_procnodeid = node2_1 +
2107 first_half;
2108 }
2109 }
2110 } else {
2111 cpi->cpi_procnodeid = 0;
2112 }
2113
2114 cpi->cpi_chipid =
2115 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2116
2117 cpi->cpi_ncore_bits = coreidsz;
2118 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2119 cpi->cpi_ncore_per_chip);
2120 }
2121
2122 static void
2123 spec_l1d_flush_noop(void)
2124 {
2125 }
2126
2127 static void
2128 spec_l1d_flush_msr(void)
2129 {
2130 wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2131 }
2132
2133 void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
2134
2135 static void
2136 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2137 {
2138 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2139
2140 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2141 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2142 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2143 add_x86_feature(featureset, X86FSET_IBPB);
2144 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2145 add_x86_feature(featureset, X86FSET_IBRS);
2146 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2147 add_x86_feature(featureset, X86FSET_STIBP);
2148 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2149 add_x86_feature(featureset, X86FSET_IBRS_ALL);
2150 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2151 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2152 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2153 add_x86_feature(featureset, X86FSET_RSBA);
2154 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2155 add_x86_feature(featureset, X86FSET_SSBD);
2156 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2157 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2158 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2159 add_x86_feature(featureset, X86FSET_SSB_NO);
2160 } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2161 cpi->cpi_maxeax >= 7) {
2162 struct cpuid_regs *ecp;
2163 ecp = &cpi->cpi_std[7];
2164
2165 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2166 add_x86_feature(featureset, X86FSET_IBRS);
2167 add_x86_feature(featureset, X86FSET_IBPB);
2168 }
2169
2170 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2171 add_x86_feature(featureset, X86FSET_STIBP);
2172 }
2173
2174 /*
2175 * Don't read the arch caps MSR on xpv where we lack the
2176 * on_trap().
2177 */
2178 #ifndef __xpv
2179 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2180 on_trap_data_t otd;
2181
2182 /*
2183 * Be paranoid and assume we'll get a #GP.
2184 */
2185 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2186 uint64_t reg;
2187
2188 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2189 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2190 add_x86_feature(featureset,
2191 X86FSET_RDCL_NO);
2192 }
2193 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2194 add_x86_feature(featureset,
2195 X86FSET_IBRS_ALL);
2196 }
2197 if (reg & IA32_ARCH_CAP_RSBA) {
2198 add_x86_feature(featureset,
2199 X86FSET_RSBA);
2200 }
2201 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2202 add_x86_feature(featureset,
2203 X86FSET_L1D_VM_NO);
2204 }
2205 if (reg & IA32_ARCH_CAP_SSB_NO) {
2206 add_x86_feature(featureset,
2207 X86FSET_SSB_NO);
2208 }
2209 }
2210 no_trap();
2211 }
2212 #endif /* !__xpv */
2213
2214 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2215 add_x86_feature(featureset, X86FSET_SSBD);
2216
2217 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2218 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2219 }
2220
2221 if (cpu->cpu_id != 0)
2222 return;
2223
2224 /*
2225 * We're the boot CPU, so let's figure out our L1TF status.
2226 *
2227 * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
2228 * need to exclude with ht_acquire(), and we don't need to flush.
2229 */
2230 if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
2231 extern int ht_exclusion;
2232 ht_exclusion = 0;
2233 spec_l1d_flush = spec_l1d_flush_noop;
2234 membar_producer();
2235 return;
2236 }
2237
2238 /*
2239 * If HT is enabled, we will need HT exclusion, as well as the flush on
2240 * VM entry. If HT isn't enabled, we still need at least the flush for
2241 * the L1TF sequential case.
2242 *
2243 * However, if X86FSET_L1D_VM_NO is set, we're most likely running
2244 * inside a VM ourselves, and we don't need the flush.
2245 *
2246 * If we don't have the FLUSH_CMD available at all, we'd better just
2247 * hope HT is disabled.
2248 */
2249 if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2250 !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2251 spec_l1d_flush = spec_l1d_flush_msr;
2252 } else {
2253 spec_l1d_flush = spec_l1d_flush_noop;
2254 }
2255
2256 membar_producer();
2257 }
2258
2259 /*
2260 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2261 */
2262 void
2263 setup_xfem(void)
2264 {
2265 uint64_t flags = XFEATURE_LEGACY_FP;
2266
2267 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2268
2269 if (is_x86_feature(x86_featureset, X86FSET_SSE))
2270 flags |= XFEATURE_SSE;
2271
2272 if (is_x86_feature(x86_featureset, X86FSET_AVX))
2273 flags |= XFEATURE_AVX;
2274
2275 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2276 flags |= XFEATURE_AVX512;
2277
2278 set_xcr(XFEATURE_ENABLED_MASK, flags);
2279
2280 xsave_bv_all = flags;
2281 }
2282
2283 static void
2284 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2285 {
2286 struct cpuid_info *cpi;
2287
2288 cpi = cpu->cpu_m.mcpu_cpi;
2289
2290 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2291 cpuid_gather_amd_topology_leaves(cpu);
2292 }
2293
2294 cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2295
2296 /*
2297 * Before we can calculate the IDs that we should assign to this
2298 * processor, we need to understand how many cores and threads it has.
2299 */
2300 switch (cpi->cpi_vendor) {
2301 case X86_VENDOR_Intel:
2302 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2303 &cpi->cpi_ncore_per_chip);
2304 break;
2305 case X86_VENDOR_AMD:
2306 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2307 &cpi->cpi_ncore_per_chip);
2308 break;
2309 default:
2310 /*
2311 * If we have some other x86 compatible chip, it's not clear how
2312 * they would behave. The most common case is virtualization
2313 * today, though there are also 64-bit VIA chips. Assume that
2314 * all we can get is the basic Leaf 1 HTT information.
2315 */
2316 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2317 cpi->cpi_ncore_per_chip = 1;
2318 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2319 }
2320 break;
2321 }
2322
2323 /*
2324 * Based on the calculated number of threads and cores, potentially
2325 * assign the HTT and CMT features.
2326 */
2327 if (cpi->cpi_ncore_per_chip > 1) {
2328 add_x86_feature(featureset, X86FSET_CMP);
2329 }
2330
2331 if (cpi->cpi_ncpu_per_chip > 1 &&
2332 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2333 add_x86_feature(featureset, X86FSET_HTT);
2334 }
2335
2336 /*
2337 * Now that has been set up, we need to go through and calculate all of
2338 * the rest of the parameters that exist. If we think the CPU doesn't
2339 * have either SMT (HTT) or CMP, then we basically go through and fake
2340 * up information in some way. The most likely case for this is
2341 * virtualization where we have a lot of partial topology information.
2342 */
2343 if (!is_x86_feature(featureset, X86FSET_HTT) &&
2344 !is_x86_feature(featureset, X86FSET_CMP)) {
2345 /*
2346 * This is a single core, single-threaded processor.
2347 */
2348 cpi->cpi_procnodes_per_pkg = 1;
2349 cpi->cpi_cores_per_compunit = 1;
2350 cpi->cpi_compunitid = 0;
2351 cpi->cpi_chipid = -1;
2352 cpi->cpi_clogid = 0;
2353 cpi->cpi_coreid = cpu->cpu_id;
2354 cpi->cpi_pkgcoreid = 0;
2355 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2356 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2357 } else {
2358 cpi->cpi_procnodeid = cpi->cpi_chipid;
2359 }
2360 } else {
2361 switch (cpi->cpi_vendor) {
2362 case X86_VENDOR_Intel:
2363 cpuid_intel_getids(cpu, featureset);
2364 break;
2365 case X86_VENDOR_AMD:
2366 cpuid_amd_getids(cpu, featureset);
2367 break;
2368 default:
2369 /*
2370 * In this case, it's hard to say what we should do.
2371 * We're going to model them to the OS as single core
2372 * threads. We don't have a good identifier for them, so
2373 * we're just going to use the cpu id all on a single
2374 * chip.
2375 *
2376 * This case has historically been different from the
2377 * case above where we don't have HTT or CMP. While they
2378 * could be combined, we've opted to keep it separate to
2379 * minimize the risk of topology changes in weird cases.
2380 */
2381 cpi->cpi_procnodes_per_pkg = 1;
2382 cpi->cpi_cores_per_compunit = 1;
2383 cpi->cpi_chipid = 0;
2384 cpi->cpi_coreid = cpu->cpu_id;
2385 cpi->cpi_clogid = cpu->cpu_id;
2386 cpi->cpi_pkgcoreid = cpu->cpu_id;
2387 cpi->cpi_procnodeid = cpi->cpi_chipid;
2388 cpi->cpi_compunitid = cpi->cpi_coreid;
2389 break;
2390 }
2391 }
2392 }
2393
2394 void
2395 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2396 {
2397 uint32_t mask_ecx, mask_edx;
2398 struct cpuid_info *cpi;
2399 struct cpuid_regs *cp;
2400 int xcpuid;
2401 #if !defined(__xpv)
2402 extern int idle_cpu_prefer_mwait;
2403 #endif
2404
2405 /*
2406 * Space statically allocated for BSP, ensure pointer is set
2407 */
2408 if (cpu->cpu_id == 0) {
2409 if (cpu->cpu_m.mcpu_cpi == NULL)
2410 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2411 }
2412
2413 add_x86_feature(featureset, X86FSET_CPUID);
2414
2415 cpi = cpu->cpu_m.mcpu_cpi;
2416 ASSERT(cpi != NULL);
2417 cp = &cpi->cpi_std[0];
2418 cp->cp_eax = 0;
2419 cpi->cpi_maxeax = __cpuid_insn(cp);
2420 {
2421 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2422 *iptr++ = cp->cp_ebx;
2423 *iptr++ = cp->cp_edx;
2424 *iptr++ = cp->cp_ecx;
2425 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2426 }
2427
2428 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2429 x86_vendor = cpi->cpi_vendor; /* for compatibility */
2430
2431 /*
2432 * Limit the range in case of weird hardware
2433 */
2434 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2435 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2436 if (cpi->cpi_maxeax < 1)
2437 goto pass1_done;
2438
2439 cp = &cpi->cpi_std[1];
2440 cp->cp_eax = 1;
2441 (void) __cpuid_insn(cp);
2442
2443 /*
2444 * Extract identifying constants for easy access.
2445 */
2446 cpi->cpi_model = CPI_MODEL(cpi);
2447 cpi->cpi_family = CPI_FAMILY(cpi);
2448
2449 if (cpi->cpi_family == 0xf)
2450 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2451
2452 /*
2453 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2454 * Intel, and presumably everyone else, uses model == 0xf, as
2455 * one would expect (max value means possible overflow). Sigh.
2456 */
2457
2458 switch (cpi->cpi_vendor) {
2459 case X86_VENDOR_Intel:
2460 if (IS_EXTENDED_MODEL_INTEL(cpi))
2461 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2462 break;
2463 case X86_VENDOR_AMD:
2464 if (CPI_FAMILY(cpi) == 0xf)
2465 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2466 break;
2467 default:
2468 if (cpi->cpi_model == 0xf)
2469 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2470 break;
2471 }
2472
2473 cpi->cpi_step = CPI_STEP(cpi);
2474 cpi->cpi_brandid = CPI_BRANDID(cpi);
2475
2476 /*
2477 * *default* assumptions:
2478 * - believe %edx feature word
2479 * - ignore %ecx feature word
2480 * - 32-bit virtual and physical addressing
2481 */
2482 mask_edx = 0xffffffff;
2483 mask_ecx = 0;
2484
2485 cpi->cpi_pabits = cpi->cpi_vabits = 32;
2486
2487 switch (cpi->cpi_vendor) {
2488 case X86_VENDOR_Intel:
2489 if (cpi->cpi_family == 5)
2490 x86_type = X86_TYPE_P5;
2491 else if (IS_LEGACY_P6(cpi)) {
2492 x86_type = X86_TYPE_P6;
2493 pentiumpro_bug4046376 = 1;
2494 /*
2495 * Clear the SEP bit when it was set erroneously
2496 */
2497 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2498 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2499 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2500 x86_type = X86_TYPE_P4;
2501 /*
2502 * We don't currently depend on any of the %ecx
2503 * features until Prescott, so we'll only check
2504 * this from P4 onwards. We might want to revisit
2505 * that idea later.
2506 */
2507 mask_ecx = 0xffffffff;
2508 } else if (cpi->cpi_family > 0xf)
2509 mask_ecx = 0xffffffff;
2510 /*
2511 * We don't support MONITOR/MWAIT if leaf 5 is not available
2512 * to obtain the monitor linesize.
2513 */
2514 if (cpi->cpi_maxeax < 5)
2515 mask_ecx &= ~CPUID_INTC_ECX_MON;
2516 break;
2517 case X86_VENDOR_IntelClone:
2518 default:
2519 break;
2520 case X86_VENDOR_AMD:
2521 #if defined(OPTERON_ERRATUM_108)
2522 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2523 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2524 cpi->cpi_model = 0xc;
2525 } else
2526 #endif
2527 if (cpi->cpi_family == 5) {
2528 /*
2529 * AMD K5 and K6
2530 *
2531 * These CPUs have an incomplete implementation
2532 * of MCA/MCE which we mask away.
2533 */
2534 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2535
2536 /*
2537 * Model 0 uses the wrong (APIC) bit
2538 * to indicate PGE. Fix it here.
2539 */
2540 if (cpi->cpi_model == 0) {
2541 if (cp->cp_edx & 0x200) {
2542 cp->cp_edx &= ~0x200;
2543 cp->cp_edx |= CPUID_INTC_EDX_PGE;
2544 }
2545 }
2546
2547 /*
2548 * Early models had problems w/ MMX; disable.
2549 */
2550 if (cpi->cpi_model < 6)
2551 mask_edx &= ~CPUID_INTC_EDX_MMX;
2552 }
2553
2554 /*
2555 * For newer families, SSE3 and CX16, at least, are valid;
2556 * enable all
2557 */
2558 if (cpi->cpi_family >= 0xf)
2559 mask_ecx = 0xffffffff;
2560 /*
2561 * We don't support MONITOR/MWAIT if leaf 5 is not available
2562 * to obtain the monitor linesize.
2563 */
2564 if (cpi->cpi_maxeax < 5)
2565 mask_ecx &= ~CPUID_INTC_ECX_MON;
2566
2567 #if !defined(__xpv)
2568 /*
2569 * AMD has not historically used MWAIT in the CPU's idle loop.
2570 * Pre-family-10h Opterons do not have the MWAIT instruction. We
2571 * know for certain that in at least family 17h, per AMD, mwait
2572 * is preferred. Families in-between are less certain.
2573 */
2574 if (cpi->cpi_family < 0x17) {
2575 idle_cpu_prefer_mwait = 0;
2576 }
2577 #endif
2578
2579 break;
2580 case X86_VENDOR_TM:
2581 /*
2582 * workaround the NT workaround in CMS 4.1
2583 */
2584 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2585 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2586 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2587 break;
2588 case X86_VENDOR_Centaur:
2589 /*
2590 * workaround the NT workarounds again
2591 */
2592 if (cpi->cpi_family == 6)
2593 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2594 break;
2595 case X86_VENDOR_Cyrix:
2596 /*
2597 * We rely heavily on the probing in locore
2598 * to actually figure out what parts, if any,
2599 * of the Cyrix cpuid instruction to believe.
2600 */
2601 switch (x86_type) {
2602 case X86_TYPE_CYRIX_486:
2603 mask_edx = 0;
2604 break;
2605 case X86_TYPE_CYRIX_6x86:
2606 mask_edx = 0;
2607 break;
2608 case X86_TYPE_CYRIX_6x86L:
2609 mask_edx =
2610 CPUID_INTC_EDX_DE |
2611 CPUID_INTC_EDX_CX8;
2612 break;
2613 case X86_TYPE_CYRIX_6x86MX:
2614 mask_edx =
2615 CPUID_INTC_EDX_DE |
2616 CPUID_INTC_EDX_MSR |
2617 CPUID_INTC_EDX_CX8 |
2618 CPUID_INTC_EDX_PGE |
2619 CPUID_INTC_EDX_CMOV |
2620 CPUID_INTC_EDX_MMX;
2621 break;
2622 case X86_TYPE_CYRIX_GXm:
2623 mask_edx =
2624 CPUID_INTC_EDX_MSR |
2625 CPUID_INTC_EDX_CX8 |
2626 CPUID_INTC_EDX_CMOV |
2627 CPUID_INTC_EDX_MMX;
2628 break;
2629 case X86_TYPE_CYRIX_MediaGX:
2630 break;
2631 case X86_TYPE_CYRIX_MII:
2632 case X86_TYPE_VIA_CYRIX_III:
2633 mask_edx =
2634 CPUID_INTC_EDX_DE |
2635 CPUID_INTC_EDX_TSC |
2636 CPUID_INTC_EDX_MSR |
2637 CPUID_INTC_EDX_CX8 |
2638 CPUID_INTC_EDX_PGE |
2639 CPUID_INTC_EDX_CMOV |
2640 CPUID_INTC_EDX_MMX;
2641 break;
2642 default:
2643 break;
2644 }
2645 break;
2646 }
2647
2648 #if defined(__xpv)
2649 /*
2650 * Do not support MONITOR/MWAIT under a hypervisor
2651 */
2652 mask_ecx &= ~CPUID_INTC_ECX_MON;
2653 /*
2654 * Do not support XSAVE under a hypervisor for now
2655 */
2656 xsave_force_disable = B_TRUE;
2657
2658 #endif /* __xpv */
2659
2660 if (xsave_force_disable) {
2661 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2662 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2663 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2664 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2665 }
2666
2667 /*
2668 * Now we've figured out the masks that determine
2669 * which bits we choose to believe, apply the masks
2670 * to the feature words, then map the kernel's view
2671 * of these feature words into its feature word.
2672 */
2673 cp->cp_edx &= mask_edx;
2674 cp->cp_ecx &= mask_ecx;
2675
2676 /*
2677 * apply any platform restrictions (we don't call this
2678 * immediately after __cpuid_insn here, because we need the
2679 * workarounds applied above first)
2680 */
2681 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2682
2683 /*
2684 * In addition to ecx and edx, Intel and AMD are storing a bunch of
2685 * instruction set extensions in leaf 7's ebx, ecx, and edx.
2686 */
2687 if (cpi->cpi_maxeax >= 7) {
2688 struct cpuid_regs *ecp;
2689 ecp = &cpi->cpi_std[7];
2690 ecp->cp_eax = 7;
2691 ecp->cp_ecx = 0;
2692 (void) __cpuid_insn(ecp);
2693
2694 /*
2695 * If XSAVE has been disabled, just ignore all of the
2696 * extended-save-area dependent flags here.
2697 */
2698 if (xsave_force_disable) {
2699 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2700 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2701 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2702 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2703 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2704 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2705 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2706 }
2707
2708 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2709 add_x86_feature(featureset, X86FSET_SMEP);
2710
2711 /*
2712 * We check disable_smap here in addition to in startup_smap()
2713 * to ensure CPUs that aren't the boot CPU don't accidentally
2714 * include it in the feature set and thus generate a mismatched
2715 * x86 feature set across CPUs.
2716 */
2717 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2718 disable_smap == 0)
2719 add_x86_feature(featureset, X86FSET_SMAP);
2720
2721 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2722 add_x86_feature(featureset, X86FSET_RDSEED);
2723
2724 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2725 add_x86_feature(featureset, X86FSET_ADX);
2726
2727 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2728 add_x86_feature(featureset, X86FSET_FSGSBASE);
2729
2730 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2731 add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2732
2733 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2734 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2735 add_x86_feature(featureset, X86FSET_INVPCID);
2736
2737 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2738 add_x86_feature(featureset, X86FSET_MPX);
2739
2740 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2741 add_x86_feature(featureset, X86FSET_CLWB);
2742 }
2743 }
2744
2745 /*
2746 * fold in overrides from the "eeprom" mechanism
2747 */
2748 cp->cp_edx |= cpuid_feature_edx_include;
2749 cp->cp_edx &= ~cpuid_feature_edx_exclude;
2750
2751 cp->cp_ecx |= cpuid_feature_ecx_include;
2752 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2753
2754 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2755 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2756 }
2757 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2758 add_x86_feature(featureset, X86FSET_TSC);
2759 }
2760 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2761 add_x86_feature(featureset, X86FSET_MSR);
2762 }
2763 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2764 add_x86_feature(featureset, X86FSET_MTRR);
2765 }
2766 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2767 add_x86_feature(featureset, X86FSET_PGE);
2768 }
2769 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2770 add_x86_feature(featureset, X86FSET_CMOV);
2771 }
2772 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2773 add_x86_feature(featureset, X86FSET_MMX);
2774 }
2775 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2776 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2777 add_x86_feature(featureset, X86FSET_MCA);
2778 }
2779 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2780 add_x86_feature(featureset, X86FSET_PAE);
2781 }
2782 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2783 add_x86_feature(featureset, X86FSET_CX8);
2784 }
2785 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2786 add_x86_feature(featureset, X86FSET_CX16);
2787 }
2788 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2789 add_x86_feature(featureset, X86FSET_PAT);
2790 }
2791 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2792 add_x86_feature(featureset, X86FSET_SEP);
2793 }
2794 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2795 /*
2796 * In our implementation, fxsave/fxrstor
2797 * are prerequisites before we'll even
2798 * try and do SSE things.
2799 */
2800 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2801 add_x86_feature(featureset, X86FSET_SSE);
2802 }
2803 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2804 add_x86_feature(featureset, X86FSET_SSE2);
2805 }
2806 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2807 add_x86_feature(featureset, X86FSET_SSE3);
2808 }
2809 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2810 add_x86_feature(featureset, X86FSET_SSSE3);
2811 }
2812 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2813 add_x86_feature(featureset, X86FSET_SSE4_1);
2814 }
2815 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2816 add_x86_feature(featureset, X86FSET_SSE4_2);
2817 }
2818 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2819 add_x86_feature(featureset, X86FSET_AES);
2820 }
2821 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2822 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2823 }
2824
2825 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2826 add_x86_feature(featureset, X86FSET_SHA);
2827
2828 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2829 add_x86_feature(featureset, X86FSET_UMIP);
2830 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2831 add_x86_feature(featureset, X86FSET_PKU);
2832 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2833 add_x86_feature(featureset, X86FSET_OSPKE);
2834
2835 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2836 add_x86_feature(featureset, X86FSET_XSAVE);
2837
2838 /* We only test AVX & AVX512 when there is XSAVE */
2839
2840 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2841 add_x86_feature(featureset,
2842 X86FSET_AVX);
2843
2844 /*
2845 * Intel says we can't check these without also
2846 * checking AVX.
2847 */
2848 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2849 add_x86_feature(featureset,
2850 X86FSET_F16C);
2851
2852 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2853 add_x86_feature(featureset,
2854 X86FSET_FMA);
2855
2856 if (cpi->cpi_std[7].cp_ebx &
2857 CPUID_INTC_EBX_7_0_BMI1)
2858 add_x86_feature(featureset,
2859 X86FSET_BMI1);
2860
2861 if (cpi->cpi_std[7].cp_ebx &
2862 CPUID_INTC_EBX_7_0_BMI2)
2863 add_x86_feature(featureset,
2864 X86FSET_BMI2);
2865
2866 if (cpi->cpi_std[7].cp_ebx &
2867 CPUID_INTC_EBX_7_0_AVX2)
2868 add_x86_feature(featureset,
2869 X86FSET_AVX2);
2870 }
2871
2872 if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2873 (cpi->cpi_std[7].cp_ebx &
2874 CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2875 add_x86_feature(featureset, X86FSET_AVX512F);
2876
2877 if (cpi->cpi_std[7].cp_ebx &
2878 CPUID_INTC_EBX_7_0_AVX512DQ)
2879 add_x86_feature(featureset,
2880 X86FSET_AVX512DQ);
2881 if (cpi->cpi_std[7].cp_ebx &
2882 CPUID_INTC_EBX_7_0_AVX512IFMA)
2883 add_x86_feature(featureset,
2884 X86FSET_AVX512FMA);
2885 if (cpi->cpi_std[7].cp_ebx &
2886 CPUID_INTC_EBX_7_0_AVX512PF)
2887 add_x86_feature(featureset,
2888 X86FSET_AVX512PF);
2889 if (cpi->cpi_std[7].cp_ebx &
2890 CPUID_INTC_EBX_7_0_AVX512ER)
2891 add_x86_feature(featureset,
2892 X86FSET_AVX512ER);
2893 if (cpi->cpi_std[7].cp_ebx &
2894 CPUID_INTC_EBX_7_0_AVX512CD)
2895 add_x86_feature(featureset,
2896 X86FSET_AVX512CD);
2897 if (cpi->cpi_std[7].cp_ebx &
2898 CPUID_INTC_EBX_7_0_AVX512BW)
2899 add_x86_feature(featureset,
2900 X86FSET_AVX512BW);
2901 if (cpi->cpi_std[7].cp_ebx &
2902 CPUID_INTC_EBX_7_0_AVX512VL)
2903 add_x86_feature(featureset,
2904 X86FSET_AVX512VL);
2905
2906 if (cpi->cpi_std[7].cp_ecx &
2907 CPUID_INTC_ECX_7_0_AVX512VBMI)
2908 add_x86_feature(featureset,
2909 X86FSET_AVX512VBMI);
2910 if (cpi->cpi_std[7].cp_ecx &
2911 CPUID_INTC_ECX_7_0_AVX512VNNI)
2912 add_x86_feature(featureset,
2913 X86FSET_AVX512VNNI);
2914 if (cpi->cpi_std[7].cp_ecx &
2915 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2916 add_x86_feature(featureset,
2917 X86FSET_AVX512VPOPCDQ);
2918
2919 if (cpi->cpi_std[7].cp_edx &
2920 CPUID_INTC_EDX_7_0_AVX5124NNIW)
2921 add_x86_feature(featureset,
2922 X86FSET_AVX512NNIW);
2923 if (cpi->cpi_std[7].cp_edx &
2924 CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2925 add_x86_feature(featureset,
2926 X86FSET_AVX512FMAPS);
2927 }
2928 }
2929 }
2930
2931 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2932 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2933 add_x86_feature(featureset, X86FSET_PCID);
2934 }
2935 }
2936
2937 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2938 add_x86_feature(featureset, X86FSET_X2APIC);
2939 }
2940 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2941 add_x86_feature(featureset, X86FSET_DE);
2942 }
2943 #if !defined(__xpv)
2944 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2945
2946 /*
2947 * We require the CLFLUSH instruction for erratum workaround
2948 * to use MONITOR/MWAIT.
2949 */
2950 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2951 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2952 add_x86_feature(featureset, X86FSET_MWAIT);
2953 } else {
2954 extern int idle_cpu_assert_cflush_monitor;
2955
2956 /*
2957 * All processors we are aware of which have
2958 * MONITOR/MWAIT also have CLFLUSH.
2959 */
2960 if (idle_cpu_assert_cflush_monitor) {
2961 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2962 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2963 }
2964 }
2965 }
2966 #endif /* __xpv */
2967
2968 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2969 add_x86_feature(featureset, X86FSET_VMX);
2970 }
2971
2972 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2973 add_x86_feature(featureset, X86FSET_RDRAND);
2974
2975 /*
2976 * Only need it first time, rest of the cpus would follow suit.
2977 * we only capture this for the bootcpu.
2978 */
2979 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2980 add_x86_feature(featureset, X86FSET_CLFSH);
2981 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2982 }
2983 if (is_x86_feature(featureset, X86FSET_PAE))
2984 cpi->cpi_pabits = 36;
2985
2986 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2987 struct cpuid_regs r, *ecp;
2988
2989 ecp = &r;
2990 ecp->cp_eax = 0xD;
2991 ecp->cp_ecx = 1;
2992 ecp->cp_edx = ecp->cp_ebx = 0;
2993 (void) __cpuid_insn(ecp);
2994
2995 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2996 add_x86_feature(featureset, X86FSET_XSAVEOPT);
2997 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2998 add_x86_feature(featureset, X86FSET_XSAVEC);
2999 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
3000 add_x86_feature(featureset, X86FSET_XSAVES);
3001 }
3002
3003 /*
3004 * Work on the "extended" feature information, doing
3005 * some basic initialization for cpuid_pass2()
3006 */
3007 xcpuid = 0;
3008 switch (cpi->cpi_vendor) {
3009 case X86_VENDOR_Intel:
3010 /*
3011 * On KVM we know we will have proper support for extended
3012 * cpuid.
3013 */
3014 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
3015 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
3016 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
3017 xcpuid++;
3018 break;
3019 case X86_VENDOR_AMD:
3020 if (cpi->cpi_family > 5 ||
3021 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3022 xcpuid++;
3023 break;
3024 case X86_VENDOR_Cyrix:
3025 /*
3026 * Only these Cyrix CPUs are -known- to support
3027 * extended cpuid operations.
3028 */
3029 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
3030 x86_type == X86_TYPE_CYRIX_GXm)
3031 xcpuid++;
3032 break;
3033 case X86_VENDOR_Centaur:
3034 case X86_VENDOR_TM:
3035 default:
3036 xcpuid++;
3037 break;
3038 }
3039
3040 if (xcpuid) {
3041 cp = &cpi->cpi_extd[0];
3042 cp->cp_eax = CPUID_LEAF_EXT_0;
3043 cpi->cpi_xmaxeax = __cpuid_insn(cp);
3044 }
3045
3046 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
3047
3048 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3049 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3050
3051 switch (cpi->cpi_vendor) {
3052 case X86_VENDOR_Intel:
3053 case X86_VENDOR_AMD:
3054 if (cpi->cpi_xmaxeax < 0x80000001)
3055 break;
3056 cp = &cpi->cpi_extd[1];
3057 cp->cp_eax = 0x80000001;
3058 (void) __cpuid_insn(cp);
3059
3060 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3061 cpi->cpi_family == 5 &&
3062 cpi->cpi_model == 6 &&
3063 cpi->cpi_step == 6) {
3064 /*
3065 * K6 model 6 uses bit 10 to indicate SYSC
3066 * Later models use bit 11. Fix it here.
3067 */
3068 if (cp->cp_edx & 0x400) {
3069 cp->cp_edx &= ~0x400;
3070 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3071 }
3072 }
3073
3074 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3075
3076 /*
3077 * Compute the additions to the kernel's feature word.
3078 */
3079 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3080 add_x86_feature(featureset, X86FSET_NX);
3081 }
3082
3083 /*
3084 * Regardless whether or not we boot 64-bit,
3085 * we should have a way to identify whether
3086 * the CPU is capable of running 64-bit.
3087 */
3088 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3089 add_x86_feature(featureset, X86FSET_64);
3090 }
3091
3092 /* 1 GB large page - enable only for 64 bit kernel */
3093 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3094 add_x86_feature(featureset, X86FSET_1GPG);
3095 }
3096
3097 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3098 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3099 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3100 add_x86_feature(featureset, X86FSET_SSE4A);
3101 }
3102
3103 /*
3104 * It's really tricky to support syscall/sysret in
3105 * the i386 kernel; we rely on sysenter/sysexit
3106 * instead. In the amd64 kernel, things are -way-
3107 * better.
3108 */
3109 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3110 add_x86_feature(featureset, X86FSET_ASYSC);
3111 }
3112
3113 /*
3114 * While we're thinking about system calls, note
3115 * that AMD processors don't support sysenter
3116 * in long mode at all, so don't try to program them.
3117 */
3118 if (x86_vendor == X86_VENDOR_AMD) {
3119 remove_x86_feature(featureset, X86FSET_SEP);
3120 }
3121
3122 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3123 add_x86_feature(featureset, X86FSET_TSCP);
3124 }
3125
3126 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3127 add_x86_feature(featureset, X86FSET_SVM);
3128 }
3129
3130 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3131 add_x86_feature(featureset, X86FSET_TOPOEXT);
3132 }
3133
3134 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3135 add_x86_feature(featureset, X86FSET_XOP);
3136 }
3137
3138 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3139 add_x86_feature(featureset, X86FSET_FMA4);
3140 }
3141
3142 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3143 add_x86_feature(featureset, X86FSET_TBM);
3144 }
3145
3146 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3147 add_x86_feature(featureset, X86FSET_MONITORX);
3148 }
3149 break;
3150 default:
3151 break;
3152 }
3153
3154 /*
3155 * Get CPUID data about processor cores and hyperthreads.
3156 */
3157 switch (cpi->cpi_vendor) {
3158 case X86_VENDOR_Intel:
3159 if (cpi->cpi_maxeax >= 4) {
3160 cp = &cpi->cpi_std[4];
3161 cp->cp_eax = 4;
3162 cp->cp_ecx = 0;
3163 (void) __cpuid_insn(cp);
3164 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3165 }
3166 /*FALLTHROUGH*/
3167 case X86_VENDOR_AMD:
3168 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3169 break;
3170 cp = &cpi->cpi_extd[8];
3171 cp->cp_eax = CPUID_LEAF_EXT_8;
3172 (void) __cpuid_insn(cp);
3173 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3174 cp);
3175
3176 /*
3177 * AMD uses ebx for some extended functions.
3178 */
3179 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3180 /*
3181 * While we're here, check for the AMD "Error
3182 * Pointer Zero/Restore" feature. This can be
3183 * used to setup the FP save handlers
3184 * appropriately.
3185 */
3186 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3187 cpi->cpi_fp_amd_save = 0;
3188 } else {
3189 cpi->cpi_fp_amd_save = 1;
3190 }
3191
3192 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3193 add_x86_feature(featureset,
3194 X86FSET_CLZERO);
3195 }
3196 }
3197
3198 /*
3199 * Virtual and physical address limits from
3200 * cpuid override previously guessed values.
3201 */
3202 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3203 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3204 break;
3205 default:
3206 break;
3207 }
3208
3209 /*
3210 * Get CPUID data about TSC Invariance in Deep C-State.
3211 */
3212 switch (cpi->cpi_vendor) {
3213 case X86_VENDOR_Intel:
3214 case X86_VENDOR_AMD:
3215 if (cpi->cpi_maxeax >= 7) {
3216 cp = &cpi->cpi_extd[7];
3217 cp->cp_eax = 0x80000007;
3218 cp->cp_ecx = 0;
3219 (void) __cpuid_insn(cp);
3220 }
3221 break;
3222 default:
3223 break;
3224 }
3225 }
3226
3227 cpuid_pass1_topology(cpu, featureset);
3228
3229 /*
3230 * Synthesize chip "revision" and socket type
3231 */
3232 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3233 cpi->cpi_model, cpi->cpi_step);
3234 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3235 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3236 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3237 cpi->cpi_model, cpi->cpi_step);
3238
3239 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3240 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3241 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3242 /* Special handling for AMD FP not necessary. */
3243 cpi->cpi_fp_amd_save = 0;
3244 } else {
3245 cpi->cpi_fp_amd_save = 1;
3246 }
3247 }
3248
3249 /*
3250 * Check the processor leaves that are used for security features.
3251 */
3252 cpuid_scan_security(cpu, featureset);
3253
3254 pass1_done:
3255 cpi->cpi_pass = 1;
3256 }
3257
3258 /*
3259 * Make copies of the cpuid table entries we depend on, in
3260 * part for ease of parsing now, in part so that we have only
3261 * one place to correct any of it, in part for ease of
3262 * later export to userland, and in part so we can look at
3263 * this stuff in a crash dump.
3264 */
3265
3266 /*ARGSUSED*/
3267 void
3268 cpuid_pass2(cpu_t *cpu)
3269 {
3270 uint_t n, nmax;
3271 int i;
3272 struct cpuid_regs *cp;
3273 uint8_t *dp;
3274 uint32_t *iptr;
3275 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3276
3277 ASSERT(cpi->cpi_pass == 1);
3278
3279 if (cpi->cpi_maxeax < 1)
3280 goto pass2_done;
3281
3282 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3283 nmax = NMAX_CPI_STD;
3284 /*
3285 * (We already handled n == 0 and n == 1 in pass 1)
3286 */
3287 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3288 cp->cp_eax = n;
3289
3290 /*
3291 * n == 7 was handled in pass 1
3292 */
3293 if (n == 7)
3294 continue;
3295
3296 /*
3297 * CPUID function 4 expects %ecx to be initialized
3298 * with an index which indicates which cache to return
3299 * information about. The OS is expected to call function 4
3300 * with %ecx set to 0, 1, 2, ... until it returns with
3301 * EAX[4:0] set to 0, which indicates there are no more
3302 * caches.
3303 *
3304 * Here, populate cpi_std[4] with the information returned by
3305 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3306 * when dynamic memory allocation becomes available.
3307 *
3308 * Note: we need to explicitly initialize %ecx here, since
3309 * function 4 may have been previously invoked.
3310 */
3311 if (n == 4)
3312 cp->cp_ecx = 0;
3313
3314 (void) __cpuid_insn(cp);
3315 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3316 switch (n) {
3317 case 2:
3318 /*
3319 * "the lower 8 bits of the %eax register
3320 * contain a value that identifies the number
3321 * of times the cpuid [instruction] has to be
3322 * executed to obtain a complete image of the
3323 * processor's caching systems."
3324 *
3325 * How *do* they make this stuff up?
3326 */
3327 cpi->cpi_ncache = sizeof (*cp) *
3328 BITX(cp->cp_eax, 7, 0);
3329 if (cpi->cpi_ncache == 0)
3330 break;
3331 cpi->cpi_ncache--; /* skip count byte */
3332
3333 /*
3334 * Well, for now, rather than attempt to implement
3335 * this slightly dubious algorithm, we just look
3336 * at the first 15 ..
3337 */
3338 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3339 cpi->cpi_ncache = sizeof (*cp) - 1;
3340
3341 dp = cpi->cpi_cacheinfo;
3342 if (BITX(cp->cp_eax, 31, 31) == 0) {
3343 uint8_t *p = (void *)&cp->cp_eax;
3344 for (i = 1; i < 4; i++)
3345 if (p[i] != 0)
3346 *dp++ = p[i];
3347 }
3348 if (BITX(cp->cp_ebx, 31, 31) == 0) {
3349 uint8_t *p = (void *)&cp->cp_ebx;
3350 for (i = 0; i < 4; i++)
3351 if (p[i] != 0)
3352 *dp++ = p[i];
3353 }
3354 if (BITX(cp->cp_ecx, 31, 31) == 0) {
3355 uint8_t *p = (void *)&cp->cp_ecx;
3356 for (i = 0; i < 4; i++)
3357 if (p[i] != 0)
3358 *dp++ = p[i];
3359 }
3360 if (BITX(cp->cp_edx, 31, 31) == 0) {
3361 uint8_t *p = (void *)&cp->cp_edx;
3362 for (i = 0; i < 4; i++)
3363 if (p[i] != 0)
3364 *dp++ = p[i];
3365 }
3366 break;
3367
3368 case 3: /* Processor serial number, if PSN supported */
3369 break;
3370
3371 case 4: /* Deterministic cache parameters */
3372 break;
3373
3374 case 5: /* Monitor/Mwait parameters */
3375 {
3376 size_t mwait_size;
3377
3378 /*
3379 * check cpi_mwait.support which was set in cpuid_pass1
3380 */
3381 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3382 break;
3383
3384 /*
3385 * Protect ourself from insane mwait line size.
3386 * Workaround for incomplete hardware emulator(s).
3387 */
3388 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3389 if (mwait_size < sizeof (uint32_t) ||
3390 !ISP2(mwait_size)) {
3391 #if DEBUG
3392 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3393 "size %ld", cpu->cpu_id, (long)mwait_size);
3394 #endif
3395 break;
3396 }
3397
3398 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3399 cpi->cpi_mwait.mon_max = mwait_size;
3400 if (MWAIT_EXTENSION(cpi)) {
3401 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3402 if (MWAIT_INT_ENABLE(cpi))
3403 cpi->cpi_mwait.support |=
3404 MWAIT_ECX_INT_ENABLE;
3405 }
3406 break;
3407 }
3408 default:
3409 break;
3410 }
3411 }
3412
3413 /*
3414 * XSAVE enumeration
3415 */
3416 if (cpi->cpi_maxeax >= 0xD) {
3417 struct cpuid_regs regs;
3418 boolean_t cpuid_d_valid = B_TRUE;
3419
3420 cp = ®s;
3421 cp->cp_eax = 0xD;
3422 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3423
3424 (void) __cpuid_insn(cp);
3425
3426 /*
3427 * Sanity checks for debug
3428 */
3429 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3430 (cp->cp_eax & XFEATURE_SSE) == 0) {
3431 cpuid_d_valid = B_FALSE;
3432 }
3433
3434 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3435 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3436 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3437
3438 /*
3439 * If the hw supports AVX, get the size and offset in the save
3440 * area for the ymm state.
3441 */
3442 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3443 cp->cp_eax = 0xD;
3444 cp->cp_ecx = 2;
3445 cp->cp_edx = cp->cp_ebx = 0;
3446
3447 (void) __cpuid_insn(cp);
3448
3449 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3450 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3451 cpuid_d_valid = B_FALSE;
3452 }
3453
3454 cpi->cpi_xsave.ymm_size = cp->cp_eax;
3455 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3456 }
3457
3458 /*
3459 * If the hw supports MPX, get the size and offset in the
3460 * save area for BNDREGS and BNDCSR.
3461 */
3462 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3463 cp->cp_eax = 0xD;
3464 cp->cp_ecx = 3;
3465 cp->cp_edx = cp->cp_ebx = 0;
3466
3467 (void) __cpuid_insn(cp);
3468
3469 cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3470 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3471
3472 cp->cp_eax = 0xD;
3473 cp->cp_ecx = 4;
3474 cp->cp_edx = cp->cp_ebx = 0;
3475
3476 (void) __cpuid_insn(cp);
3477
3478 cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3479 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3480 }
3481
3482 /*
3483 * If the hw supports AVX512, get the size and offset in the
3484 * save area for the opmask registers and zmm state.
3485 */
3486 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3487 cp->cp_eax = 0xD;
3488 cp->cp_ecx = 5;
3489 cp->cp_edx = cp->cp_ebx = 0;
3490
3491 (void) __cpuid_insn(cp);
3492
3493 cpi->cpi_xsave.opmask_size = cp->cp_eax;
3494 cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3495
3496 cp->cp_eax = 0xD;
3497 cp->cp_ecx = 6;
3498 cp->cp_edx = cp->cp_ebx = 0;
3499
3500 (void) __cpuid_insn(cp);
3501
3502 cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3503 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3504
3505 cp->cp_eax = 0xD;
3506 cp->cp_ecx = 7;
3507 cp->cp_edx = cp->cp_ebx = 0;
3508
3509 (void) __cpuid_insn(cp);
3510
3511 cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3512 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3513 }
3514
3515 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3516 xsave_state_size = 0;
3517 } else if (cpuid_d_valid) {
3518 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3519 } else {
3520 /* Broken CPUID 0xD, probably in HVM */
3521 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3522 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3523 ", ymm_size = %d, ymm_offset = %d\n",
3524 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3525 cpi->cpi_xsave.xsav_hw_features_high,
3526 (int)cpi->cpi_xsave.xsav_max_size,
3527 (int)cpi->cpi_xsave.ymm_size,
3528 (int)cpi->cpi_xsave.ymm_offset);
3529
3530 if (xsave_state_size != 0) {
3531 /*
3532 * This must be a non-boot CPU. We cannot
3533 * continue, because boot cpu has already
3534 * enabled XSAVE.
3535 */
3536 ASSERT(cpu->cpu_id != 0);
3537 cmn_err(CE_PANIC, "cpu%d: we have already "
3538 "enabled XSAVE on boot cpu, cannot "
3539 "continue.", cpu->cpu_id);
3540 } else {
3541 /*
3542 * If we reached here on the boot CPU, it's also
3543 * almost certain that we'll reach here on the
3544 * non-boot CPUs. When we're here on a boot CPU
3545 * we should disable the feature, on a non-boot
3546 * CPU we need to confirm that we have.
3547 */
3548 if (cpu->cpu_id == 0) {
3549 remove_x86_feature(x86_featureset,
3550 X86FSET_XSAVE);
3551 remove_x86_feature(x86_featureset,
3552 X86FSET_AVX);
3553 remove_x86_feature(x86_featureset,
3554 X86FSET_F16C);
3555 remove_x86_feature(x86_featureset,
3556 X86FSET_BMI1);
3557 remove_x86_feature(x86_featureset,
3558 X86FSET_BMI2);
3559 remove_x86_feature(x86_featureset,
3560 X86FSET_FMA);
3561 remove_x86_feature(x86_featureset,
3562 X86FSET_AVX2);
3563 remove_x86_feature(x86_featureset,
3564 X86FSET_MPX);
3565 remove_x86_feature(x86_featureset,
3566 X86FSET_AVX512F);
3567 remove_x86_feature(x86_featureset,
3568 X86FSET_AVX512DQ);
3569 remove_x86_feature(x86_featureset,
3570 X86FSET_AVX512PF);
3571 remove_x86_feature(x86_featureset,
3572 X86FSET_AVX512ER);
3573 remove_x86_feature(x86_featureset,
3574 X86FSET_AVX512CD);
3575 remove_x86_feature(x86_featureset,
3576 X86FSET_AVX512BW);
3577 remove_x86_feature(x86_featureset,
3578 X86FSET_AVX512VL);
3579 remove_x86_feature(x86_featureset,
3580 X86FSET_AVX512FMA);
3581 remove_x86_feature(x86_featureset,
3582 X86FSET_AVX512VBMI);
3583 remove_x86_feature(x86_featureset,
3584 X86FSET_AVX512VNNI);
3585 remove_x86_feature(x86_featureset,
3586 X86FSET_AVX512VPOPCDQ);
3587 remove_x86_feature(x86_featureset,
3588 X86FSET_AVX512NNIW);
3589 remove_x86_feature(x86_featureset,
3590 X86FSET_AVX512FMAPS);
3591
3592 CPI_FEATURES_ECX(cpi) &=
3593 ~CPUID_INTC_ECX_XSAVE;
3594 CPI_FEATURES_ECX(cpi) &=
3595 ~CPUID_INTC_ECX_AVX;
3596 CPI_FEATURES_ECX(cpi) &=
3597 ~CPUID_INTC_ECX_F16C;
3598 CPI_FEATURES_ECX(cpi) &=
3599 ~CPUID_INTC_ECX_FMA;
3600 CPI_FEATURES_7_0_EBX(cpi) &=
3601 ~CPUID_INTC_EBX_7_0_BMI1;
3602 CPI_FEATURES_7_0_EBX(cpi) &=
3603 ~CPUID_INTC_EBX_7_0_BMI2;
3604 CPI_FEATURES_7_0_EBX(cpi) &=
3605 ~CPUID_INTC_EBX_7_0_AVX2;
3606 CPI_FEATURES_7_0_EBX(cpi) &=
3607 ~CPUID_INTC_EBX_7_0_MPX;
3608 CPI_FEATURES_7_0_EBX(cpi) &=
3609 ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3610
3611 CPI_FEATURES_7_0_ECX(cpi) &=
3612 ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3613
3614 CPI_FEATURES_7_0_EDX(cpi) &=
3615 ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3616
3617 xsave_force_disable = B_TRUE;
3618 } else {
3619 VERIFY(is_x86_feature(x86_featureset,
3620 X86FSET_XSAVE) == B_FALSE);
3621 }
3622 }
3623 }
3624 }
3625
3626
3627 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3628 goto pass2_done;
3629
3630 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3631 nmax = NMAX_CPI_EXTD;
3632 /*
3633 * Copy the extended properties, fixing them as we go.
3634 * (We already handled n == 0 and n == 1 in pass 1)
3635 */
3636 iptr = (void *)cpi->cpi_brandstr;
3637 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3638 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3639 (void) __cpuid_insn(cp);
3640 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3641 cp);
3642 switch (n) {
3643 case 2:
3644 case 3:
3645 case 4:
3646 /*
3647 * Extract the brand string
3648 */
3649 *iptr++ = cp->cp_eax;
3650 *iptr++ = cp->cp_ebx;
3651 *iptr++ = cp->cp_ecx;
3652 *iptr++ = cp->cp_edx;
3653 break;
3654 case 5:
3655 switch (cpi->cpi_vendor) {
3656 case X86_VENDOR_AMD:
3657 /*
3658 * The Athlon and Duron were the first
3659 * parts to report the sizes of the
3660 * TLB for large pages. Before then,
3661 * we don't trust the data.
3662 */
3663 if (cpi->cpi_family < 6 ||
3664 (cpi->cpi_family == 6 &&
3665 cpi->cpi_model < 1))
3666 cp->cp_eax = 0;
3667 break;
3668 default:
3669 break;
3670 }
3671 break;
3672 case 6:
3673 switch (cpi->cpi_vendor) {
3674 case X86_VENDOR_AMD:
3675 /*
3676 * The Athlon and Duron were the first
3677 * AMD parts with L2 TLB's.
3678 * Before then, don't trust the data.
3679 */
3680 if (cpi->cpi_family < 6 ||
3681 cpi->cpi_family == 6 &&
3682 cpi->cpi_model < 1)
3683 cp->cp_eax = cp->cp_ebx = 0;
3684 /*
3685 * AMD Duron rev A0 reports L2
3686 * cache size incorrectly as 1K
3687 * when it is really 64K
3688 */
3689 if (cpi->cpi_family == 6 &&
3690 cpi->cpi_model == 3 &&
3691 cpi->cpi_step == 0) {
3692 cp->cp_ecx &= 0xffff;
3693 cp->cp_ecx |= 0x400000;
3694 }
3695 break;
3696 case X86_VENDOR_Cyrix: /* VIA C3 */
3697 /*
3698 * VIA C3 processors are a bit messed
3699 * up w.r.t. encoding cache sizes in %ecx
3700 */
3701 if (cpi->cpi_family != 6)
3702 break;
3703 /*
3704 * model 7 and 8 were incorrectly encoded
3705 *
3706 * xxx is model 8 really broken?
3707 */
3708 if (cpi->cpi_model == 7 ||
3709 cpi->cpi_model == 8)
3710 cp->cp_ecx =
3711 BITX(cp->cp_ecx, 31, 24) << 16 |
3712 BITX(cp->cp_ecx, 23, 16) << 12 |
3713 BITX(cp->cp_ecx, 15, 8) << 8 |
3714 BITX(cp->cp_ecx, 7, 0);
3715 /*
3716 * model 9 stepping 1 has wrong associativity
3717 */
3718 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3719 cp->cp_ecx |= 8 << 12;
3720 break;
3721 case X86_VENDOR_Intel:
3722 /*
3723 * Extended L2 Cache features function.
3724 * First appeared on Prescott.
3725 */
3726 default:
3727 break;
3728 }
3729 break;
3730 default:
3731 break;
3732 }
3733 }
3734
3735 pass2_done:
3736 cpi->cpi_pass = 2;
3737 }
3738
3739 static const char *
3740 intel_cpubrand(const struct cpuid_info *cpi)
3741 {
3742 int i;
3743
3744 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3745 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3746 return ("i486");
3747
3748 switch (cpi->cpi_family) {
3749 case 5:
3750 return ("Intel Pentium(r)");
3751 case 6:
3752 switch (cpi->cpi_model) {
3753 uint_t celeron, xeon;
3754 const struct cpuid_regs *cp;
3755 case 0:
3756 case 1:
3757 case 2:
3758 return ("Intel Pentium(r) Pro");
3759 case 3:
3760 case 4:
3761 return ("Intel Pentium(r) II");
3762 case 6:
3763 return ("Intel Celeron(r)");
3764 case 5:
3765 case 7:
3766 celeron = xeon = 0;
3767 cp = &cpi->cpi_std[2]; /* cache info */
3768
3769 for (i = 1; i < 4; i++) {
3770 uint_t tmp;
3771
3772 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3773 if (tmp == 0x40)
3774 celeron++;
3775 if (tmp >= 0x44 && tmp <= 0x45)
3776 xeon++;
3777 }
3778
3779 for (i = 0; i < 2; i++) {
3780 uint_t tmp;
3781
3782 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3783 if (tmp == 0x40)
3784 celeron++;
3785 else if (tmp >= 0x44 && tmp <= 0x45)
3786 xeon++;
3787 }
3788
3789 for (i = 0; i < 4; i++) {
3790 uint_t tmp;
3791
3792 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3793 if (tmp == 0x40)
3794 celeron++;
3795 else if (tmp >= 0x44 && tmp <= 0x45)
3796 xeon++;
3797 }
3798
3799 for (i = 0; i < 4; i++) {
3800 uint_t tmp;
3801
3802 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3803 if (tmp == 0x40)
3804 celeron++;
3805 else if (tmp >= 0x44 && tmp <= 0x45)
3806 xeon++;
3807 }
3808
3809 if (celeron)
3810 return ("Intel Celeron(r)");
3811 if (xeon)
3812 return (cpi->cpi_model == 5 ?
3813 "Intel Pentium(r) II Xeon(tm)" :
3814 "Intel Pentium(r) III Xeon(tm)");
3815 return (cpi->cpi_model == 5 ?
3816 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3817 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3818 default:
3819 break;
3820 }
3821 default:
3822 break;
3823 }
3824
3825 /* BrandID is present if the field is nonzero */
3826 if (cpi->cpi_brandid != 0) {
3827 static const struct {
3828 uint_t bt_bid;
3829 const char *bt_str;
3830 } brand_tbl[] = {
3831 { 0x1, "Intel(r) Celeron(r)" },
3832 { 0x2, "Intel(r) Pentium(r) III" },
3833 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
3834 { 0x4, "Intel(r) Pentium(r) III" },
3835 { 0x6, "Mobile Intel(r) Pentium(r) III" },
3836 { 0x7, "Mobile Intel(r) Celeron(r)" },
3837 { 0x8, "Intel(r) Pentium(r) 4" },
3838 { 0x9, "Intel(r) Pentium(r) 4" },
3839 { 0xa, "Intel(r) Celeron(r)" },
3840 { 0xb, "Intel(r) Xeon(tm)" },
3841 { 0xc, "Intel(r) Xeon(tm) MP" },
3842 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
3843 { 0xf, "Mobile Intel(r) Celeron(r)" },
3844 { 0x11, "Mobile Genuine Intel(r)" },
3845 { 0x12, "Intel(r) Celeron(r) M" },
3846 { 0x13, "Mobile Intel(r) Celeron(r)" },
3847 { 0x14, "Intel(r) Celeron(r)" },
3848 { 0x15, "Mobile Genuine Intel(r)" },
3849 { 0x16, "Intel(r) Pentium(r) M" },
3850 { 0x17, "Mobile Intel(r) Celeron(r)" }
3851 };
3852 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3853 uint_t sgn;
3854
3855 sgn = (cpi->cpi_family << 8) |
3856 (cpi->cpi_model << 4) | cpi->cpi_step;
3857
3858 for (i = 0; i < btblmax; i++)
3859 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3860 break;
3861 if (i < btblmax) {
3862 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3863 return ("Intel(r) Celeron(r)");
3864 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3865 return ("Intel(r) Xeon(tm) MP");
3866 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3867 return ("Intel(r) Xeon(tm)");
3868 return (brand_tbl[i].bt_str);
3869 }
3870 }
3871
3872 return (NULL);
3873 }
3874
3875 static const char *
3876 amd_cpubrand(const struct cpuid_info *cpi)
3877 {
3878 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3879 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3880 return ("i486 compatible");
3881
3882 switch (cpi->cpi_family) {
3883 case 5:
3884 switch (cpi->cpi_model) {
3885 case 0:
3886 case 1:
3887 case 2:
3888 case 3:
3889 case 4:
3890 case 5:
3891 return ("AMD-K5(r)");
3892 case 6:
3893 case 7:
3894 return ("AMD-K6(r)");
3895 case 8:
3896 return ("AMD-K6(r)-2");
3897 case 9:
3898 return ("AMD-K6(r)-III");
3899 default:
3900 return ("AMD (family 5)");
3901 }
3902 case 6:
3903 switch (cpi->cpi_model) {
3904 case 1:
3905 return ("AMD-K7(tm)");
3906 case 0:
3907 case 2:
3908 case 4:
3909 return ("AMD Athlon(tm)");
3910 case 3:
3911 case 7:
3912 return ("AMD Duron(tm)");
3913 case 6:
3914 case 8:
3915 case 10:
3916 /*
3917 * Use the L2 cache size to distinguish
3918 */
3919 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3920 "AMD Athlon(tm)" : "AMD Duron(tm)");
3921 default:
3922 return ("AMD (family 6)");
3923 }
3924 default:
3925 break;
3926 }
3927
3928 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3929 cpi->cpi_brandid != 0) {
3930 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3931 case 3:
3932 return ("AMD Opteron(tm) UP 1xx");
3933 case 4:
3934 return ("AMD Opteron(tm) DP 2xx");
3935 case 5:
3936 return ("AMD Opteron(tm) MP 8xx");
3937 default:
3938 return ("AMD Opteron(tm)");
3939 }
3940 }
3941
3942 return (NULL);
3943 }
3944
3945 static const char *
3946 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3947 {
3948 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3949 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3950 type == X86_TYPE_CYRIX_486)
3951 return ("i486 compatible");
3952
3953 switch (type) {
3954 case X86_TYPE_CYRIX_6x86:
3955 return ("Cyrix 6x86");
3956 case X86_TYPE_CYRIX_6x86L:
3957 return ("Cyrix 6x86L");
3958 case X86_TYPE_CYRIX_6x86MX:
3959 return ("Cyrix 6x86MX");
3960 case X86_TYPE_CYRIX_GXm:
3961 return ("Cyrix GXm");
3962 case X86_TYPE_CYRIX_MediaGX:
3963 return ("Cyrix MediaGX");
3964 case X86_TYPE_CYRIX_MII:
3965 return ("Cyrix M2");
3966 case X86_TYPE_VIA_CYRIX_III:
3967 return ("VIA Cyrix M3");
3968 default:
3969 /*
3970 * Have another wild guess ..
3971 */
3972 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3973 return ("Cyrix 5x86");
3974 else if (cpi->cpi_family == 5) {
3975 switch (cpi->cpi_model) {
3976 case 2:
3977 return ("Cyrix 6x86"); /* Cyrix M1 */
3978 case 4:
3979 return ("Cyrix MediaGX");
3980 default:
3981 break;
3982 }
3983 } else if (cpi->cpi_family == 6) {
3984 switch (cpi->cpi_model) {
3985 case 0:
3986 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3987 case 5:
3988 case 6:
3989 case 7:
3990 case 8:
3991 case 9:
3992 return ("VIA C3");
3993 default:
3994 break;
3995 }
3996 }
3997 break;
3998 }
3999 return (NULL);
4000 }
4001
4002 /*
4003 * This only gets called in the case that the CPU extended
4004 * feature brand string (0x80000002, 0x80000003, 0x80000004)
4005 * aren't available, or contain null bytes for some reason.
4006 */
4007 static void
4008 fabricate_brandstr(struct cpuid_info *cpi)
4009 {
4010 const char *brand = NULL;
4011
4012 switch (cpi->cpi_vendor) {
4013 case X86_VENDOR_Intel:
4014 brand = intel_cpubrand(cpi);
4015 break;
4016 case X86_VENDOR_AMD:
4017 brand = amd_cpubrand(cpi);
4018 break;
4019 case X86_VENDOR_Cyrix:
4020 brand = cyrix_cpubrand(cpi, x86_type);
4021 break;
4022 case X86_VENDOR_NexGen:
4023 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4024 brand = "NexGen Nx586";
4025 break;
4026 case X86_VENDOR_Centaur:
4027 if (cpi->cpi_family == 5)
4028 switch (cpi->cpi_model) {
4029 case 4:
4030 brand = "Centaur C6";
4031 break;
4032 case 8:
4033 brand = "Centaur C2";
4034 break;
4035 case 9:
4036 brand = "Centaur C3";
4037 break;
4038 default:
4039 break;
4040 }
4041 break;
4042 case X86_VENDOR_Rise:
4043 if (cpi->cpi_family == 5 &&
4044 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4045 brand = "Rise mP6";
4046 break;
4047 case X86_VENDOR_SiS:
4048 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4049 brand = "SiS 55x";
4050 break;
4051 case X86_VENDOR_TM:
4052 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4053 brand = "Transmeta Crusoe TM3x00 or TM5x00";
4054 break;
4055 case X86_VENDOR_NSC:
4056 case X86_VENDOR_UMC:
4057 default:
4058 break;
4059 }
4060 if (brand) {
4061 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4062 return;
4063 }
4064
4065 /*
4066 * If all else fails ...
4067 */
4068 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4069 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4070 cpi->cpi_model, cpi->cpi_step);
4071 }
4072
4073 /*
4074 * This routine is called just after kernel memory allocation
4075 * becomes available on cpu0, and as part of mp_startup() on
4076 * the other cpus.
4077 *
4078 * Fixup the brand string, and collect any information from cpuid
4079 * that requires dynamically allocated storage to represent.
4080 */
4081 /*ARGSUSED*/
4082 void
4083 cpuid_pass3(cpu_t *cpu)
4084 {
4085 int i, max, shft, level, size;
4086 struct cpuid_regs regs;
4087 struct cpuid_regs *cp;
4088 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4089
4090 ASSERT(cpi->cpi_pass == 2);
4091
4092 /*
4093 * Deterministic cache parameters
4094 *
4095 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4096 * values that are present are currently defined to be the same. This
4097 * means we can use the same logic to parse it as long as we use the
4098 * appropriate leaf to get the data. If you're updating this, make sure
4099 * you're careful about which vendor supports which aspect.
4100 *
4101 * Take this opportunity to detect the number of threads sharing the
4102 * last level cache, and construct a corresponding cache id. The
4103 * respective cpuid_info members are initialized to the default case of
4104 * "no last level cache sharing".
4105 */
4106 cpi->cpi_ncpu_shr_last_cache = 1;
4107 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4108
4109 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4110 (cpi->cpi_vendor == X86_VENDOR_AMD &&
4111 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4112 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4113 uint32_t leaf;
4114
4115 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4116 leaf = 4;
4117 } else {
4118 leaf = CPUID_LEAF_EXT_1d;
4119 }
4120
4121 /*
4122 * Find the # of elements (size) returned by the leaf and along
4123 * the way detect last level cache sharing details.
4124 */
4125 bzero(®s, sizeof (regs));
4126 cp = ®s;
4127 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4128 cp->cp_eax = leaf;
4129 cp->cp_ecx = i;
4130
4131 (void) __cpuid_insn(cp);
4132
4133 if (CPI_CACHE_TYPE(cp) == 0)
4134 break;
4135 level = CPI_CACHE_LVL(cp);
4136 if (level > max) {
4137 max = level;
4138 cpi->cpi_ncpu_shr_last_cache =
4139 CPI_NTHR_SHR_CACHE(cp) + 1;
4140 }
4141 }
4142 cpi->cpi_cache_leaf_size = size = i;
4143
4144 /*
4145 * Allocate the cpi_cache_leaves array. The first element
4146 * references the regs for the corresponding leaf with %ecx set
4147 * to 0. This was gathered in cpuid_pass2().
4148 */
4149 if (size > 0) {
4150 cpi->cpi_cache_leaves =
4151 kmem_alloc(size * sizeof (cp), KM_SLEEP);
4152 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4153 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4154 } else {
4155 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4156 }
4157
4158 /*
4159 * Allocate storage to hold the additional regs
4160 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4161 *
4162 * The regs for the leaf, %ecx == 0 has already
4163 * been allocated as indicated above.
4164 */
4165 for (i = 1; i < size; i++) {
4166 cp = cpi->cpi_cache_leaves[i] =
4167 kmem_zalloc(sizeof (regs), KM_SLEEP);
4168 cp->cp_eax = leaf;
4169 cp->cp_ecx = i;
4170
4171 (void) __cpuid_insn(cp);
4172 }
4173 }
4174 /*
4175 * Determine the number of bits needed to represent
4176 * the number of CPUs sharing the last level cache.
4177 *
4178 * Shift off that number of bits from the APIC id to
4179 * derive the cache id.
4180 */
4181 shft = 0;
4182 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4183 shft++;
4184 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4185 }
4186
4187 /*
4188 * Now fixup the brand string
4189 */
4190 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4191 fabricate_brandstr(cpi);
4192 } else {
4193
4194 /*
4195 * If we successfully extracted a brand string from the cpuid
4196 * instruction, clean it up by removing leading spaces and
4197 * similar junk.
4198 */
4199 if (cpi->cpi_brandstr[0]) {
4200 size_t maxlen = sizeof (cpi->cpi_brandstr);
4201 char *src, *dst;
4202
4203 dst = src = (char *)cpi->cpi_brandstr;
4204 src[maxlen - 1] = '\0';
4205 /*
4206 * strip leading spaces
4207 */
4208 while (*src == ' ')
4209 src++;
4210 /*
4211 * Remove any 'Genuine' or "Authentic" prefixes
4212 */
4213 if (strncmp(src, "Genuine ", 8) == 0)
4214 src += 8;
4215 if (strncmp(src, "Authentic ", 10) == 0)
4216 src += 10;
4217
4218 /*
4219 * Now do an in-place copy.
4220 * Map (R) to (r) and (TM) to (tm).
4221 * The era of teletypes is long gone, and there's
4222 * -really- no need to shout.
4223 */
4224 while (*src != '\0') {
4225 if (src[0] == '(') {
4226 if (strncmp(src + 1, "R)", 2) == 0) {
4227 (void) strncpy(dst, "(r)", 3);
4228 src += 3;
4229 dst += 3;
4230 continue;
4231 }
4232 if (strncmp(src + 1, "TM)", 3) == 0) {
4233 (void) strncpy(dst, "(tm)", 4);
4234 src += 4;
4235 dst += 4;
4236 continue;
4237 }
4238 }
4239 *dst++ = *src++;
4240 }
4241 *dst = '\0';
4242
4243 /*
4244 * Finally, remove any trailing spaces
4245 */
4246 while (--dst > cpi->cpi_brandstr)
4247 if (*dst == ' ')
4248 *dst = '\0';
4249 else
4250 break;
4251 } else
4252 fabricate_brandstr(cpi);
4253 }
4254 cpi->cpi_pass = 3;
4255 }
4256
4257 /*
4258 * This routine is called out of bind_hwcap() much later in the life
4259 * of the kernel (post_startup()). The job of this routine is to resolve
4260 * the hardware feature support and kernel support for those features into
4261 * what we're actually going to tell applications via the aux vector.
4262 */
4263 void
4264 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4265 {
4266 struct cpuid_info *cpi;
4267 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4268
4269 if (cpu == NULL)
4270 cpu = CPU;
4271 cpi = cpu->cpu_m.mcpu_cpi;
4272
4273 ASSERT(cpi->cpi_pass == 3);
4274
4275 if (cpi->cpi_maxeax >= 1) {
4276 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4277 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4278 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4279
4280 *edx = CPI_FEATURES_EDX(cpi);
4281 *ecx = CPI_FEATURES_ECX(cpi);
4282 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4283
4284 /*
4285 * [these require explicit kernel support]
4286 */
4287 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4288 *edx &= ~CPUID_INTC_EDX_SEP;
4289
4290 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4291 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4292 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4293 *edx &= ~CPUID_INTC_EDX_SSE2;
4294
4295 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4296 *edx &= ~CPUID_INTC_EDX_HTT;
4297
4298 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4299 *ecx &= ~CPUID_INTC_ECX_SSE3;
4300
4301 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4302 *ecx &= ~CPUID_INTC_ECX_SSSE3;
4303 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4304 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4305 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4306 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4307 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4308 *ecx &= ~CPUID_INTC_ECX_AES;
4309 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4310 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4311 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4312 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4313 CPUID_INTC_ECX_OSXSAVE);
4314 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4315 *ecx &= ~CPUID_INTC_ECX_AVX;
4316 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4317 *ecx &= ~CPUID_INTC_ECX_F16C;
4318 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4319 *ecx &= ~CPUID_INTC_ECX_FMA;
4320 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4321 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4322 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4323 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4324 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4325 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4326 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4327 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4328 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4329 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4330
4331 /*
4332 * [no explicit support required beyond x87 fp context]
4333 */
4334 if (!fpu_exists)
4335 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4336
4337 /*
4338 * Now map the supported feature vector to things that we
4339 * think userland will care about.
4340 */
4341 if (*edx & CPUID_INTC_EDX_SEP)
4342 hwcap_flags |= AV_386_SEP;
4343 if (*edx & CPUID_INTC_EDX_SSE)
4344 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4345 if (*edx & CPUID_INTC_EDX_SSE2)
4346 hwcap_flags |= AV_386_SSE2;
4347 if (*ecx & CPUID_INTC_ECX_SSE3)
4348 hwcap_flags |= AV_386_SSE3;
4349 if (*ecx & CPUID_INTC_ECX_SSSE3)
4350 hwcap_flags |= AV_386_SSSE3;
4351 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4352 hwcap_flags |= AV_386_SSE4_1;
4353 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4354 hwcap_flags |= AV_386_SSE4_2;
4355 if (*ecx & CPUID_INTC_ECX_MOVBE)
4356 hwcap_flags |= AV_386_MOVBE;
4357 if (*ecx & CPUID_INTC_ECX_AES)
4358 hwcap_flags |= AV_386_AES;
4359 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4360 hwcap_flags |= AV_386_PCLMULQDQ;
4361 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4362 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4363 hwcap_flags |= AV_386_XSAVE;
4364
4365 if (*ecx & CPUID_INTC_ECX_AVX) {
4366 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4367 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4368
4369 hwcap_flags |= AV_386_AVX;
4370 if (*ecx & CPUID_INTC_ECX_F16C)
4371 hwcap_flags_2 |= AV_386_2_F16C;
4372 if (*ecx & CPUID_INTC_ECX_FMA)
4373 hwcap_flags_2 |= AV_386_2_FMA;
4374
4375 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4376 hwcap_flags_2 |= AV_386_2_BMI1;
4377 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4378 hwcap_flags_2 |= AV_386_2_BMI2;
4379 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4380 hwcap_flags_2 |= AV_386_2_AVX2;
4381 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4382 hwcap_flags_2 |= AV_386_2_AVX512F;
4383 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4384 hwcap_flags_2 |= AV_386_2_AVX512DQ;
4385 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4386 hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4387 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4388 hwcap_flags_2 |= AV_386_2_AVX512PF;
4389 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4390 hwcap_flags_2 |= AV_386_2_AVX512ER;
4391 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4392 hwcap_flags_2 |= AV_386_2_AVX512CD;
4393 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4394 hwcap_flags_2 |= AV_386_2_AVX512BW;
4395 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4396 hwcap_flags_2 |= AV_386_2_AVX512VL;
4397
4398 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4399 hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4400 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4401 hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4402 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4403 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4404
4405 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4406 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4407 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4408 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4409 }
4410 }
4411 if (*ecx & CPUID_INTC_ECX_VMX)
4412 hwcap_flags |= AV_386_VMX;
4413 if (*ecx & CPUID_INTC_ECX_POPCNT)
4414 hwcap_flags |= AV_386_POPCNT;
4415 if (*edx & CPUID_INTC_EDX_FPU)
4416 hwcap_flags |= AV_386_FPU;
4417 if (*edx & CPUID_INTC_EDX_MMX)
4418 hwcap_flags |= AV_386_MMX;
4419
4420 if (*edx & CPUID_INTC_EDX_TSC)
4421 hwcap_flags |= AV_386_TSC;
4422 if (*edx & CPUID_INTC_EDX_CX8)
4423 hwcap_flags |= AV_386_CX8;
4424 if (*edx & CPUID_INTC_EDX_CMOV)
4425 hwcap_flags |= AV_386_CMOV;
4426 if (*ecx & CPUID_INTC_ECX_CX16)
4427 hwcap_flags |= AV_386_CX16;
4428
4429 if (*ecx & CPUID_INTC_ECX_RDRAND)
4430 hwcap_flags_2 |= AV_386_2_RDRAND;
4431 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4432 hwcap_flags_2 |= AV_386_2_ADX;
4433 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4434 hwcap_flags_2 |= AV_386_2_RDSEED;
4435 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4436 hwcap_flags_2 |= AV_386_2_SHA;
4437 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4438 hwcap_flags_2 |= AV_386_2_FSGSBASE;
4439 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4440 hwcap_flags_2 |= AV_386_2_CLWB;
4441 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4442 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4443
4444 }
4445 /*
4446 * Check a few miscilaneous features.
4447 */
4448 if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4449 hwcap_flags_2 |= AV_386_2_CLZERO;
4450
4451 if (cpi->cpi_xmaxeax < 0x80000001)
4452 goto pass4_done;
4453
4454 switch (cpi->cpi_vendor) {
4455 struct cpuid_regs cp;
4456 uint32_t *edx, *ecx;
4457
4458 case X86_VENDOR_Intel:
4459 /*
4460 * Seems like Intel duplicated what we necessary
4461 * here to make the initial crop of 64-bit OS's work.
4462 * Hopefully, those are the only "extended" bits
4463 * they'll add.
4464 */
4465 /*FALLTHROUGH*/
4466
4467 case X86_VENDOR_AMD:
4468 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4469 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4470
4471 *edx = CPI_FEATURES_XTD_EDX(cpi);
4472 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4473
4474 /*
4475 * [these features require explicit kernel support]
4476 */
4477 switch (cpi->cpi_vendor) {
4478 case X86_VENDOR_Intel:
4479 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4480 *edx &= ~CPUID_AMD_EDX_TSCP;
4481 break;
4482
4483 case X86_VENDOR_AMD:
4484 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4485 *edx &= ~CPUID_AMD_EDX_TSCP;
4486 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4487 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4488 break;
4489
4490 default:
4491 break;
4492 }
4493
4494 /*
4495 * [no explicit support required beyond
4496 * x87 fp context and exception handlers]
4497 */
4498 if (!fpu_exists)
4499 *edx &= ~(CPUID_AMD_EDX_MMXamd |
4500 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4501
4502 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4503 *edx &= ~CPUID_AMD_EDX_NX;
4504 #if !defined(__amd64)
4505 *edx &= ~CPUID_AMD_EDX_LM;
4506 #endif
4507 /*
4508 * Now map the supported feature vector to
4509 * things that we think userland will care about.
4510 */
4511 #if defined(__amd64)
4512 if (*edx & CPUID_AMD_EDX_SYSC)
4513 hwcap_flags |= AV_386_AMD_SYSC;
4514 #endif
4515 if (*edx & CPUID_AMD_EDX_MMXamd)
4516 hwcap_flags |= AV_386_AMD_MMX;
4517 if (*edx & CPUID_AMD_EDX_3DNow)
4518 hwcap_flags |= AV_386_AMD_3DNow;
4519 if (*edx & CPUID_AMD_EDX_3DNowx)
4520 hwcap_flags |= AV_386_AMD_3DNowx;
4521 if (*ecx & CPUID_AMD_ECX_SVM)
4522 hwcap_flags |= AV_386_AMD_SVM;
4523
4524 switch (cpi->cpi_vendor) {
4525 case X86_VENDOR_AMD:
4526 if (*edx & CPUID_AMD_EDX_TSCP)
4527 hwcap_flags |= AV_386_TSCP;
4528 if (*ecx & CPUID_AMD_ECX_AHF64)
4529 hwcap_flags |= AV_386_AHF;
4530 if (*ecx & CPUID_AMD_ECX_SSE4A)
4531 hwcap_flags |= AV_386_AMD_SSE4A;
4532 if (*ecx & CPUID_AMD_ECX_LZCNT)
4533 hwcap_flags |= AV_386_AMD_LZCNT;
4534 if (*ecx & CPUID_AMD_ECX_MONITORX)
4535 hwcap_flags_2 |= AV_386_2_MONITORX;
4536 break;
4537
4538 case X86_VENDOR_Intel:
4539 if (*edx & CPUID_AMD_EDX_TSCP)
4540 hwcap_flags |= AV_386_TSCP;
4541 /*
4542 * Aarrgh.
4543 * Intel uses a different bit in the same word.
4544 */
4545 if (*ecx & CPUID_INTC_ECX_AHF64)
4546 hwcap_flags |= AV_386_AHF;
4547 break;
4548
4549 default:
4550 break;
4551 }
4552 break;
4553
4554 case X86_VENDOR_TM:
4555 cp.cp_eax = 0x80860001;
4556 (void) __cpuid_insn(&cp);
4557 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4558 break;
4559
4560 default:
4561 break;
4562 }
4563
4564 pass4_done:
4565 cpi->cpi_pass = 4;
4566 if (hwcap_out != NULL) {
4567 hwcap_out[0] = hwcap_flags;
4568 hwcap_out[1] = hwcap_flags_2;
4569 }
4570 }
4571
4572
4573 /*
4574 * Simulate the cpuid instruction using the data we previously
4575 * captured about this CPU. We try our best to return the truth
4576 * about the hardware, independently of kernel support.
4577 */
4578 uint32_t
4579 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4580 {
4581 struct cpuid_info *cpi;
4582 struct cpuid_regs *xcp;
4583
4584 if (cpu == NULL)
4585 cpu = CPU;
4586 cpi = cpu->cpu_m.mcpu_cpi;
4587
4588 ASSERT(cpuid_checkpass(cpu, 3));
4589
4590 /*
4591 * CPUID data is cached in two separate places: cpi_std for standard
4592 * CPUID leaves , and cpi_extd for extended CPUID leaves.
4593 */
4594 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4595 xcp = &cpi->cpi_std[cp->cp_eax];
4596 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4597 cp->cp_eax <= cpi->cpi_xmaxeax &&
4598 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4599 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4600 } else {
4601 /*
4602 * The caller is asking for data from an input parameter which
4603 * the kernel has not cached. In this case we go fetch from
4604 * the hardware and return the data directly to the user.
4605 */
4606 return (__cpuid_insn(cp));
4607 }
4608
4609 cp->cp_eax = xcp->cp_eax;
4610 cp->cp_ebx = xcp->cp_ebx;
4611 cp->cp_ecx = xcp->cp_ecx;
4612 cp->cp_edx = xcp->cp_edx;
4613 return (cp->cp_eax);
4614 }
4615
4616 int
4617 cpuid_checkpass(cpu_t *cpu, int pass)
4618 {
4619 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4620 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4621 }
4622
4623 int
4624 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4625 {
4626 ASSERT(cpuid_checkpass(cpu, 3));
4627
4628 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4629 }
4630
4631 int
4632 cpuid_is_cmt(cpu_t *cpu)
4633 {
4634 if (cpu == NULL)
4635 cpu = CPU;
4636
4637 ASSERT(cpuid_checkpass(cpu, 1));
4638
4639 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4640 }
4641
4642 /*
4643 * AMD and Intel both implement the 64-bit variant of the syscall
4644 * instruction (syscallq), so if there's -any- support for syscall,
4645 * cpuid currently says "yes, we support this".
4646 *
4647 * However, Intel decided to -not- implement the 32-bit variant of the
4648 * syscall instruction, so we provide a predicate to allow our caller
4649 * to test that subtlety here.
4650 *
4651 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4652 * even in the case where the hardware would in fact support it.
4653 */
4654 /*ARGSUSED*/
4655 int
4656 cpuid_syscall32_insn(cpu_t *cpu)
4657 {
4658 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4659
4660 #if !defined(__xpv)
4661 if (cpu == NULL)
4662 cpu = CPU;
4663
4664 /*CSTYLED*/
4665 {
4666 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4667
4668 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4669 cpi->cpi_xmaxeax >= 0x80000001 &&
4670 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4671 return (1);
4672 }
4673 #endif
4674 return (0);
4675 }
4676
4677 int
4678 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4679 {
4680 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4681
4682 static const char fmt[] =
4683 "x86 (%s %X family %d model %d step %d clock %d MHz)";
4684 static const char fmt_ht[] =
4685 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4686
4687 ASSERT(cpuid_checkpass(cpu, 1));
4688
4689 if (cpuid_is_cmt(cpu))
4690 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4691 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4692 cpi->cpi_family, cpi->cpi_model,
4693 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4694 return (snprintf(s, n, fmt,
4695 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4696 cpi->cpi_family, cpi->cpi_model,
4697 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4698 }
4699
4700 const char *
4701 cpuid_getvendorstr(cpu_t *cpu)
4702 {
4703 ASSERT(cpuid_checkpass(cpu, 1));
4704 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4705 }
4706
4707 uint_t
4708 cpuid_getvendor(cpu_t *cpu)
4709 {
4710 ASSERT(cpuid_checkpass(cpu, 1));
4711 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4712 }
4713
4714 uint_t
4715 cpuid_getfamily(cpu_t *cpu)
4716 {
4717 ASSERT(cpuid_checkpass(cpu, 1));
4718 return (cpu->cpu_m.mcpu_cpi->cpi_family);
4719 }
4720
4721 uint_t
4722 cpuid_getmodel(cpu_t *cpu)
4723 {
4724 ASSERT(cpuid_checkpass(cpu, 1));
4725 return (cpu->cpu_m.mcpu_cpi->cpi_model);
4726 }
4727
4728 uint_t
4729 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4730 {
4731 ASSERT(cpuid_checkpass(cpu, 1));
4732 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4733 }
4734
4735 uint_t
4736 cpuid_get_ncore_per_chip(cpu_t *cpu)
4737 {
4738 ASSERT(cpuid_checkpass(cpu, 1));
4739 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4740 }
4741
4742 uint_t
4743 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4744 {
4745 ASSERT(cpuid_checkpass(cpu, 2));
4746 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4747 }
4748
4749 id_t
4750 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4751 {
4752 ASSERT(cpuid_checkpass(cpu, 2));
4753 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4754 }
4755
4756 uint_t
4757 cpuid_getstep(cpu_t *cpu)
4758 {
4759 ASSERT(cpuid_checkpass(cpu, 1));
4760 return (cpu->cpu_m.mcpu_cpi->cpi_step);
4761 }
4762
4763 uint_t
4764 cpuid_getsig(struct cpu *cpu)
4765 {
4766 ASSERT(cpuid_checkpass(cpu, 1));
4767 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4768 }
4769
4770 uint32_t
4771 cpuid_getchiprev(struct cpu *cpu)
4772 {
4773 ASSERT(cpuid_checkpass(cpu, 1));
4774 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4775 }
4776
4777 const char *
4778 cpuid_getchiprevstr(struct cpu *cpu)
4779 {
4780 ASSERT(cpuid_checkpass(cpu, 1));
4781 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4782 }
4783
4784 uint32_t
4785 cpuid_getsockettype(struct cpu *cpu)
4786 {
4787 ASSERT(cpuid_checkpass(cpu, 1));
4788 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4789 }
4790
4791 const char *
4792 cpuid_getsocketstr(cpu_t *cpu)
4793 {
4794 static const char *socketstr = NULL;
4795 struct cpuid_info *cpi;
4796
4797 ASSERT(cpuid_checkpass(cpu, 1));
4798 cpi = cpu->cpu_m.mcpu_cpi;
4799
4800 /* Assume that socket types are the same across the system */
4801 if (socketstr == NULL)
4802 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4803 cpi->cpi_model, cpi->cpi_step);
4804
4805
4806 return (socketstr);
4807 }
4808
4809 int
4810 cpuid_get_chipid(cpu_t *cpu)
4811 {
4812 ASSERT(cpuid_checkpass(cpu, 1));
4813
4814 if (cpuid_is_cmt(cpu))
4815 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4816 return (cpu->cpu_id);
4817 }
4818
4819 id_t
4820 cpuid_get_coreid(cpu_t *cpu)
4821 {
4822 ASSERT(cpuid_checkpass(cpu, 1));
4823 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4824 }
4825
4826 int
4827 cpuid_get_pkgcoreid(cpu_t *cpu)
4828 {
4829 ASSERT(cpuid_checkpass(cpu, 1));
4830 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4831 }
4832
4833 int
4834 cpuid_get_clogid(cpu_t *cpu)
4835 {
4836 ASSERT(cpuid_checkpass(cpu, 1));
4837 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4838 }
4839
4840 int
4841 cpuid_get_cacheid(cpu_t *cpu)
4842 {
4843 ASSERT(cpuid_checkpass(cpu, 1));
4844 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4845 }
4846
4847 uint_t
4848 cpuid_get_procnodeid(cpu_t *cpu)
4849 {
4850 ASSERT(cpuid_checkpass(cpu, 1));
4851 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4852 }
4853
4854 uint_t
4855 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4856 {
4857 ASSERT(cpuid_checkpass(cpu, 1));
4858 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4859 }
4860
4861 uint_t
4862 cpuid_get_compunitid(cpu_t *cpu)
4863 {
4864 ASSERT(cpuid_checkpass(cpu, 1));
4865 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4866 }
4867
4868 uint_t
4869 cpuid_get_cores_per_compunit(cpu_t *cpu)
4870 {
4871 ASSERT(cpuid_checkpass(cpu, 1));
4872 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4873 }
4874
4875 /*ARGSUSED*/
4876 int
4877 cpuid_have_cr8access(cpu_t *cpu)
4878 {
4879 #if defined(__amd64)
4880 return (1);
4881 #else
4882 struct cpuid_info *cpi;
4883
4884 ASSERT(cpu != NULL);
4885 cpi = cpu->cpu_m.mcpu_cpi;
4886 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4887 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4888 return (1);
4889 return (0);
4890 #endif
4891 }
4892
4893 uint32_t
4894 cpuid_get_apicid(cpu_t *cpu)
4895 {
4896 ASSERT(cpuid_checkpass(cpu, 1));
4897 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4898 return (UINT32_MAX);
4899 } else {
4900 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4901 }
4902 }
4903
4904 void
4905 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4906 {
4907 struct cpuid_info *cpi;
4908
4909 if (cpu == NULL)
4910 cpu = CPU;
4911 cpi = cpu->cpu_m.mcpu_cpi;
4912
4913 ASSERT(cpuid_checkpass(cpu, 1));
4914
4915 if (pabits)
4916 *pabits = cpi->cpi_pabits;
4917 if (vabits)
4918 *vabits = cpi->cpi_vabits;
4919 }
4920
4921 size_t
4922 cpuid_get_xsave_size()
4923 {
4924 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4925 sizeof (struct xsave_state)));
4926 }
4927
4928 /*
4929 * Return true if the CPUs on this system require 'pointer clearing' for the
4930 * floating point error pointer exception handling. In the past, this has been
4931 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4932 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4933 * feature bit and is reflected in the cpi_fp_amd_save member.
4934 */
4935 boolean_t
4936 cpuid_need_fp_excp_handling()
4937 {
4938 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4939 cpuid_info0.cpi_fp_amd_save != 0);
4940 }
4941
4942 /*
4943 * Returns the number of data TLB entries for a corresponding
4944 * pagesize. If it can't be computed, or isn't known, the
4945 * routine returns zero. If you ask about an architecturally
4946 * impossible pagesize, the routine will panic (so that the
4947 * hat implementor knows that things are inconsistent.)
4948 */
4949 uint_t
4950 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4951 {
4952 struct cpuid_info *cpi;
4953 uint_t dtlb_nent = 0;
4954
4955 if (cpu == NULL)
4956 cpu = CPU;
4957 cpi = cpu->cpu_m.mcpu_cpi;
4958
4959 ASSERT(cpuid_checkpass(cpu, 1));
4960
4961 /*
4962 * Check the L2 TLB info
4963 */
4964 if (cpi->cpi_xmaxeax >= 0x80000006) {
4965 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4966
4967 switch (pagesize) {
4968
4969 case 4 * 1024:
4970 /*
4971 * All zero in the top 16 bits of the register
4972 * indicates a unified TLB. Size is in low 16 bits.
4973 */
4974 if ((cp->cp_ebx & 0xffff0000) == 0)
4975 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4976 else
4977 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4978 break;
4979
4980 case 2 * 1024 * 1024:
4981 if ((cp->cp_eax & 0xffff0000) == 0)
4982 dtlb_nent = cp->cp_eax & 0x0000ffff;
4983 else
4984 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4985 break;
4986
4987 default:
4988 panic("unknown L2 pagesize");
4989 /*NOTREACHED*/
4990 }
4991 }
4992
4993 if (dtlb_nent != 0)
4994 return (dtlb_nent);
4995
4996 /*
4997 * No L2 TLB support for this size, try L1.
4998 */
4999 if (cpi->cpi_xmaxeax >= 0x80000005) {
5000 struct cpuid_regs *cp = &cpi->cpi_extd[5];
5001
5002 switch (pagesize) {
5003 case 4 * 1024:
5004 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
5005 break;
5006 case 2 * 1024 * 1024:
5007 dtlb_nent = BITX(cp->cp_eax, 23, 16);
5008 break;
5009 default:
5010 panic("unknown L1 d-TLB pagesize");
5011 /*NOTREACHED*/
5012 }
5013 }
5014
5015 return (dtlb_nent);
5016 }
5017
5018 /*
5019 * Return 0 if the erratum is not present or not applicable, positive
5020 * if it is, and negative if the status of the erratum is unknown.
5021 *
5022 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
5023 * Processors" #25759, Rev 3.57, August 2005
5024 */
5025 int
5026 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
5027 {
5028 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
5029 uint_t eax;
5030
5031 /*
5032 * Bail out if this CPU isn't an AMD CPU, or if it's
5033 * a legacy (32-bit) AMD CPU.
5034 */
5035 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
5036 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
5037 cpi->cpi_family == 6) {
5038 return (0);
5039 }
5040
5041 eax = cpi->cpi_std[1].cp_eax;
5042
5043 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
5044 #define SH_B3(eax) (eax == 0xf51)
5045 #define B(eax) (SH_B0(eax) || SH_B3(eax))
5046
5047 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
5048
5049 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5050 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5051 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
5052 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5053
5054 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5055 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
5056 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
5057 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5058
5059 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5060 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
5061 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
5062 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
5063 #define BH_E4(eax) (eax == 0x20fb1)
5064 #define SH_E5(eax) (eax == 0x20f42)
5065 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
5066 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
5067 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5068 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5069 DH_E6(eax) || JH_E6(eax))
5070
5071 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5072 #define DR_B0(eax) (eax == 0x100f20)
5073 #define DR_B1(eax) (eax == 0x100f21)
5074 #define DR_BA(eax) (eax == 0x100f2a)
5075 #define DR_B2(eax) (eax == 0x100f22)
5076 #define DR_B3(eax) (eax == 0x100f23)
5077 #define RB_C0(eax) (eax == 0x100f40)
5078
5079 switch (erratum) {
5080 case 1:
5081 return (cpi->cpi_family < 0x10);
5082 case 51: /* what does the asterisk mean? */
5083 return (B(eax) || SH_C0(eax) || CG(eax));
5084 case 52:
5085 return (B(eax));
5086 case 57:
5087 return (cpi->cpi_family <= 0x11);
5088 case 58:
5089 return (B(eax));
5090 case 60:
5091 return (cpi->cpi_family <= 0x11);
5092 case 61:
5093 case 62:
5094 case 63:
5095 case 64:
5096 case 65:
5097 case 66:
5098 case 68:
5099 case 69:
5100 case 70:
5101 case 71:
5102 return (B(eax));
5103 case 72:
5104 return (SH_B0(eax));
5105 case 74:
5106 return (B(eax));
5107 case 75:
5108 return (cpi->cpi_family < 0x10);
5109 case 76:
5110 return (B(eax));
5111 case 77:
5112 return (cpi->cpi_family <= 0x11);
5113 case 78:
5114 return (B(eax) || SH_C0(eax));
5115 case 79:
5116 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5117 case 80:
5118 case 81:
5119 case 82:
5120 return (B(eax));
5121 case 83:
5122 return (B(eax) || SH_C0(eax) || CG(eax));
5123 case 85:
5124 return (cpi->cpi_family < 0x10);
5125 case 86:
5126 return (SH_C0(eax) || CG(eax));
5127 case 88:
5128 #if !defined(__amd64)
5129 return (0);
5130 #else
5131 return (B(eax) || SH_C0(eax));
5132 #endif
5133 case 89:
5134 return (cpi->cpi_family < 0x10);
5135 case 90:
5136 return (B(eax) || SH_C0(eax) || CG(eax));
5137 case 91:
5138 case 92:
5139 return (B(eax) || SH_C0(eax));
5140 case 93:
5141 return (SH_C0(eax));
5142 case 94:
5143 return (B(eax) || SH_C0(eax) || CG(eax));
5144 case 95:
5145 #if !defined(__amd64)
5146 return (0);
5147 #else
5148 return (B(eax) || SH_C0(eax));
5149 #endif
5150 case 96:
5151 return (B(eax) || SH_C0(eax) || CG(eax));
5152 case 97:
5153 case 98:
5154 return (SH_C0(eax) || CG(eax));
5155 case 99:
5156 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5157 case 100:
5158 return (B(eax) || SH_C0(eax));
5159 case 101:
5160 case 103:
5161 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5162 case 104:
5163 return (SH_C0(eax) || CG(eax) || D0(eax));
5164 case 105:
5165 case 106:
5166 case 107:
5167 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5168 case 108:
5169 return (DH_CG(eax));
5170 case 109:
5171 return (SH_C0(eax) || CG(eax) || D0(eax));
5172 case 110:
5173 return (D0(eax) || EX(eax));
5174 case 111:
5175 return (CG(eax));
5176 case 112:
5177 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5178 case 113:
5179 return (eax == 0x20fc0);
5180 case 114:
5181 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5182 case 115:
5183 return (SH_E0(eax) || JH_E1(eax));
5184 case 116:
5185 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5186 case 117:
5187 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5188 case 118:
5189 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5190 JH_E6(eax));
5191 case 121:
5192 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5193 case 122:
5194 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5195 case 123:
5196 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5197 case 131:
5198 return (cpi->cpi_family < 0x10);
5199 case 6336786:
5200
5201 /*
5202 * Test for AdvPowerMgmtInfo.TscPStateInvariant
5203 * if this is a K8 family or newer processor. We're testing for
5204 * this 'erratum' to determine whether or not we have a constant
5205 * TSC.
5206 *
5207 * Our current fix for this is to disable the C1-Clock ramping.
5208 * However, this doesn't work on newer processor families nor
5209 * does it work when virtualized as those devices don't exist.
5210 */
5211 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5212 return (0);
5213 }
5214
5215 if (CPI_FAMILY(cpi) == 0xf) {
5216 struct cpuid_regs regs;
5217 regs.cp_eax = 0x80000007;
5218 (void) __cpuid_insn(®s);
5219 return (!(regs.cp_edx & 0x100));
5220 }
5221 return (0);
5222 case 6323525:
5223 /*
5224 * This erratum (K8 #147) is not present on family 10 and newer.
5225 */
5226 if (cpi->cpi_family >= 0x10) {
5227 return (0);
5228 }
5229 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5230 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5231
5232 case 6671130:
5233 /*
5234 * check for processors (pre-Shanghai) that do not provide
5235 * optimal management of 1gb ptes in its tlb.
5236 */
5237 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5238
5239 case 298:
5240 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5241 DR_B2(eax) || RB_C0(eax));
5242
5243 case 721:
5244 #if defined(__amd64)
5245 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5246 #else
5247 return (0);
5248 #endif
5249
5250 default:
5251 return (-1);
5252
5253 }
5254 }
5255
5256 /*
5257 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5258 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5259 */
5260 int
5261 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5262 {
5263 struct cpuid_info *cpi;
5264 uint_t osvwid;
5265 static int osvwfeature = -1;
5266 uint64_t osvwlength;
5267
5268
5269 cpi = cpu->cpu_m.mcpu_cpi;
5270
5271 /* confirm OSVW supported */
5272 if (osvwfeature == -1) {
5273 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5274 } else {
5275 /* assert that osvw feature setting is consistent on all cpus */
5276 ASSERT(osvwfeature ==
5277 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5278 }
5279 if (!osvwfeature)
5280 return (-1);
5281
5282 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5283
5284 switch (erratum) {
5285 case 298: /* osvwid is 0 */
5286 osvwid = 0;
5287 if (osvwlength <= (uint64_t)osvwid) {
5288 /* osvwid 0 is unknown */
5289 return (-1);
5290 }
5291
5292 /*
5293 * Check the OSVW STATUS MSR to determine the state
5294 * of the erratum where:
5295 * 0 - fixed by HW
5296 * 1 - BIOS has applied the workaround when BIOS
5297 * workaround is available. (Or for other errata,
5298 * OS workaround is required.)
5299 * For a value of 1, caller will confirm that the
5300 * erratum 298 workaround has indeed been applied by BIOS.
5301 *
5302 * A 1 may be set in cpus that have a HW fix
5303 * in a mixed cpu system. Regarding erratum 298:
5304 * In a multiprocessor platform, the workaround above
5305 * should be applied to all processors regardless of
5306 * silicon revision when an affected processor is
5307 * present.
5308 */
5309
5310 return (rdmsr(MSR_AMD_OSVW_STATUS +
5311 (osvwid / OSVW_ID_CNT_PER_MSR)) &
5312 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5313
5314 default:
5315 return (-1);
5316 }
5317 }
5318
5319 static const char assoc_str[] = "associativity";
5320 static const char line_str[] = "line-size";
5321 static const char size_str[] = "size";
5322
5323 static void
5324 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5325 uint32_t val)
5326 {
5327 char buf[128];
5328
5329 /*
5330 * ndi_prop_update_int() is used because it is desirable for
5331 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5332 */
5333 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5334 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5335 }
5336
5337 /*
5338 * Intel-style cache/tlb description
5339 *
5340 * Standard cpuid level 2 gives a randomly ordered
5341 * selection of tags that index into a table that describes
5342 * cache and tlb properties.
5343 */
5344
5345 static const char l1_icache_str[] = "l1-icache";
5346 static const char l1_dcache_str[] = "l1-dcache";
5347 static const char l2_cache_str[] = "l2-cache";
5348 static const char l3_cache_str[] = "l3-cache";
5349 static const char itlb4k_str[] = "itlb-4K";
5350 static const char dtlb4k_str[] = "dtlb-4K";
5351 static const char itlb2M_str[] = "itlb-2M";
5352 static const char itlb4M_str[] = "itlb-4M";
5353 static const char dtlb4M_str[] = "dtlb-4M";
5354 static const char dtlb24_str[] = "dtlb0-2M-4M";
5355 static const char itlb424_str[] = "itlb-4K-2M-4M";
5356 static const char itlb24_str[] = "itlb-2M-4M";
5357 static const char dtlb44_str[] = "dtlb-4K-4M";
5358 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5359 static const char sl2_cache_str[] = "sectored-l2-cache";
5360 static const char itrace_str[] = "itrace-cache";
5361 static const char sl3_cache_str[] = "sectored-l3-cache";
5362 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5363
5364 static const struct cachetab {
5365 uint8_t ct_code;
5366 uint8_t ct_assoc;
5367 uint16_t ct_line_size;
5368 size_t ct_size;
5369 const char *ct_label;
5370 } intel_ctab[] = {
5371 /*
5372 * maintain descending order!
5373 *
5374 * Codes ignored - Reason
5375 * ----------------------
5376 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5377 * f0H/f1H - Currently we do not interpret prefetch size by design
5378 */
5379 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5380 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5381 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5382 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5383 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5384 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5385 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5386 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5387 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5388 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5389 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5390 { 0xd0, 4, 64, 512*1024, l3_cache_str},
5391 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5392 { 0xc0, 4, 0, 8, dtlb44_str },
5393 { 0xba, 4, 0, 64, dtlb4k_str },
5394 { 0xb4, 4, 0, 256, dtlb4k_str },
5395 { 0xb3, 4, 0, 128, dtlb4k_str },
5396 { 0xb2, 4, 0, 64, itlb4k_str },
5397 { 0xb0, 4, 0, 128, itlb4k_str },
5398 { 0x87, 8, 64, 1024*1024, l2_cache_str},
5399 { 0x86, 4, 64, 512*1024, l2_cache_str},
5400 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5401 { 0x84, 8, 32, 1024*1024, l2_cache_str},
5402 { 0x83, 8, 32, 512*1024, l2_cache_str},
5403 { 0x82, 8, 32, 256*1024, l2_cache_str},
5404 { 0x80, 8, 64, 512*1024, l2_cache_str},
5405 { 0x7f, 2, 64, 512*1024, l2_cache_str},
5406 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5407 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5408 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5409 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5410 { 0x79, 8, 64, 128*1024, sl2_cache_str},
5411 { 0x78, 8, 64, 1024*1024, l2_cache_str},
5412 { 0x73, 8, 0, 64*1024, itrace_str},
5413 { 0x72, 8, 0, 32*1024, itrace_str},
5414 { 0x71, 8, 0, 16*1024, itrace_str},
5415 { 0x70, 8, 0, 12*1024, itrace_str},
5416 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5417 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5418 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5419 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5420 { 0x5d, 0, 0, 256, dtlb44_str},
5421 { 0x5c, 0, 0, 128, dtlb44_str},
5422 { 0x5b, 0, 0, 64, dtlb44_str},
5423 { 0x5a, 4, 0, 32, dtlb24_str},
5424 { 0x59, 0, 0, 16, dtlb4k_str},
5425 { 0x57, 4, 0, 16, dtlb4k_str},
5426 { 0x56, 4, 0, 16, dtlb4M_str},
5427 { 0x55, 0, 0, 7, itlb24_str},
5428 { 0x52, 0, 0, 256, itlb424_str},
5429 { 0x51, 0, 0, 128, itlb424_str},
5430 { 0x50, 0, 0, 64, itlb424_str},
5431 { 0x4f, 0, 0, 32, itlb4k_str},
5432 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5433 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5434 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5435 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5436 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5437 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5438 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5439 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5440 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5441 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5442 { 0x44, 4, 32, 1024*1024, l2_cache_str},
5443 { 0x43, 4, 32, 512*1024, l2_cache_str},
5444 { 0x42, 4, 32, 256*1024, l2_cache_str},
5445 { 0x41, 4, 32, 128*1024, l2_cache_str},
5446 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5447 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5448 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5449 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5450 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5451 { 0x39, 4, 64, 128*1024, sl2_cache_str},
5452 { 0x30, 8, 64, 32*1024, l1_icache_str},
5453 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5454 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5455 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5456 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5457 { 0x22, 4, 64, 512*1024, sl3_cache_str},
5458 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5459 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5460 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5461 { 0x0b, 4, 0, 4, itlb4M_str},
5462 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5463 { 0x08, 4, 32, 16*1024, l1_icache_str},
5464 { 0x06, 4, 32, 8*1024, l1_icache_str},
5465 { 0x05, 4, 0, 32, dtlb4M_str},
5466 { 0x04, 4, 0, 8, dtlb4M_str},
5467 { 0x03, 4, 0, 64, dtlb4k_str},
5468 { 0x02, 4, 0, 2, itlb4M_str},
5469 { 0x01, 4, 0, 32, itlb4k_str},
5470 { 0 }
5471 };
5472
5473 static const struct cachetab cyrix_ctab[] = {
5474 { 0x70, 4, 0, 32, "tlb-4K" },
5475 { 0x80, 4, 16, 16*1024, "l1-cache" },
5476 { 0 }
5477 };
5478
5479 /*
5480 * Search a cache table for a matching entry
5481 */
5482 static const struct cachetab *
5483 find_cacheent(const struct cachetab *ct, uint_t code)
5484 {
5485 if (code != 0) {
5486 for (; ct->ct_code != 0; ct++)
5487 if (ct->ct_code <= code)
5488 break;
5489 if (ct->ct_code == code)
5490 return (ct);
5491 }
5492 return (NULL);
5493 }
5494
5495 /*
5496 * Populate cachetab entry with L2 or L3 cache-information using
5497 * cpuid function 4. This function is called from intel_walk_cacheinfo()
5498 * when descriptor 0x49 is encountered. It returns 0 if no such cache
5499 * information is found.
5500 */
5501 static int
5502 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5503 {
5504 uint32_t level, i;
5505 int ret = 0;
5506
5507 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5508 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5509
5510 if (level == 2 || level == 3) {
5511 ct->ct_assoc =
5512 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5513 ct->ct_line_size =
5514 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5515 ct->ct_size = ct->ct_assoc *
5516 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5517 ct->ct_line_size *
5518 (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5519
5520 if (level == 2) {
5521 ct->ct_label = l2_cache_str;
5522 } else if (level == 3) {
5523 ct->ct_label = l3_cache_str;
5524 }
5525 ret = 1;
5526 }
5527 }
5528
5529 return (ret);
5530 }
5531
5532 /*
5533 * Walk the cacheinfo descriptor, applying 'func' to every valid element
5534 * The walk is terminated if the walker returns non-zero.
5535 */
5536 static void
5537 intel_walk_cacheinfo(struct cpuid_info *cpi,
5538 void *arg, int (*func)(void *, const struct cachetab *))
5539 {
5540 const struct cachetab *ct;
5541 struct cachetab des_49_ct, des_b1_ct;
5542 uint8_t *dp;
5543 int i;
5544
5545 if ((dp = cpi->cpi_cacheinfo) == NULL)
5546 return;
5547 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5548 /*
5549 * For overloaded descriptor 0x49 we use cpuid function 4
5550 * if supported by the current processor, to create
5551 * cache information.
5552 * For overloaded descriptor 0xb1 we use X86_PAE flag
5553 * to disambiguate the cache information.
5554 */
5555 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5556 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5557 ct = &des_49_ct;
5558 } else if (*dp == 0xb1) {
5559 des_b1_ct.ct_code = 0xb1;
5560 des_b1_ct.ct_assoc = 4;
5561 des_b1_ct.ct_line_size = 0;
5562 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5563 des_b1_ct.ct_size = 8;
5564 des_b1_ct.ct_label = itlb2M_str;
5565 } else {
5566 des_b1_ct.ct_size = 4;
5567 des_b1_ct.ct_label = itlb4M_str;
5568 }
5569 ct = &des_b1_ct;
5570 } else {
5571 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5572 continue;
5573 }
5574 }
5575
5576 if (func(arg, ct) != 0) {
5577 break;
5578 }
5579 }
5580 }
5581
5582 /*
5583 * (Like the Intel one, except for Cyrix CPUs)
5584 */
5585 static void
5586 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5587 void *arg, int (*func)(void *, const struct cachetab *))
5588 {
5589 const struct cachetab *ct;
5590 uint8_t *dp;
5591 int i;
5592
5593 if ((dp = cpi->cpi_cacheinfo) == NULL)
5594 return;
5595 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5596 /*
5597 * Search Cyrix-specific descriptor table first ..
5598 */
5599 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5600 if (func(arg, ct) != 0)
5601 break;
5602 continue;
5603 }
5604 /*
5605 * .. else fall back to the Intel one
5606 */
5607 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5608 if (func(arg, ct) != 0)
5609 break;
5610 continue;
5611 }
5612 }
5613 }
5614
5615 /*
5616 * A cacheinfo walker that adds associativity, line-size, and size properties
5617 * to the devinfo node it is passed as an argument.
5618 */
5619 static int
5620 add_cacheent_props(void *arg, const struct cachetab *ct)
5621 {
5622 dev_info_t *devi = arg;
5623
5624 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5625 if (ct->ct_line_size != 0)
5626 add_cache_prop(devi, ct->ct_label, line_str,
5627 ct->ct_line_size);
5628 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5629 return (0);
5630 }
5631
5632
5633 static const char fully_assoc[] = "fully-associative?";
5634
5635 /*
5636 * AMD style cache/tlb description
5637 *
5638 * Extended functions 5 and 6 directly describe properties of
5639 * tlbs and various cache levels.
5640 */
5641 static void
5642 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5643 {
5644 switch (assoc) {
5645 case 0: /* reserved; ignore */
5646 break;
5647 default:
5648 add_cache_prop(devi, label, assoc_str, assoc);
5649 break;
5650 case 0xff:
5651 add_cache_prop(devi, label, fully_assoc, 1);
5652 break;
5653 }
5654 }
5655
5656 static void
5657 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5658 {
5659 if (size == 0)
5660 return;
5661 add_cache_prop(devi, label, size_str, size);
5662 add_amd_assoc(devi, label, assoc);
5663 }
5664
5665 static void
5666 add_amd_cache(dev_info_t *devi, const char *label,
5667 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5668 {
5669 if (size == 0 || line_size == 0)
5670 return;
5671 add_amd_assoc(devi, label, assoc);
5672 /*
5673 * Most AMD parts have a sectored cache. Multiple cache lines are
5674 * associated with each tag. A sector consists of all cache lines
5675 * associated with a tag. For example, the AMD K6-III has a sector
5676 * size of 2 cache lines per tag.
5677 */
5678 if (lines_per_tag != 0)
5679 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5680 add_cache_prop(devi, label, line_str, line_size);
5681 add_cache_prop(devi, label, size_str, size * 1024);
5682 }
5683
5684 static void
5685 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5686 {
5687 switch (assoc) {
5688 case 0: /* off */
5689 break;
5690 case 1:
5691 case 2:
5692 case 4:
5693 add_cache_prop(devi, label, assoc_str, assoc);
5694 break;
5695 case 6:
5696 add_cache_prop(devi, label, assoc_str, 8);
5697 break;
5698 case 8:
5699 add_cache_prop(devi, label, assoc_str, 16);
5700 break;
5701 case 0xf:
5702 add_cache_prop(devi, label, fully_assoc, 1);
5703 break;
5704 default: /* reserved; ignore */
5705 break;
5706 }
5707 }
5708
5709 static void
5710 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5711 {
5712 if (size == 0 || assoc == 0)
5713 return;
5714 add_amd_l2_assoc(devi, label, assoc);
5715 add_cache_prop(devi, label, size_str, size);
5716 }
5717
5718 static void
5719 add_amd_l2_cache(dev_info_t *devi, const char *label,
5720 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5721 {
5722 if (size == 0 || assoc == 0 || line_size == 0)
5723 return;
5724 add_amd_l2_assoc(devi, label, assoc);
5725 if (lines_per_tag != 0)
5726 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5727 add_cache_prop(devi, label, line_str, line_size);
5728 add_cache_prop(devi, label, size_str, size * 1024);
5729 }
5730
5731 static void
5732 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5733 {
5734 struct cpuid_regs *cp;
5735
5736 if (cpi->cpi_xmaxeax < 0x80000005)
5737 return;
5738 cp = &cpi->cpi_extd[5];
5739
5740 /*
5741 * 4M/2M L1 TLB configuration
5742 *
5743 * We report the size for 2M pages because AMD uses two
5744 * TLB entries for one 4M page.
5745 */
5746 add_amd_tlb(devi, "dtlb-2M",
5747 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5748 add_amd_tlb(devi, "itlb-2M",
5749 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5750
5751 /*
5752 * 4K L1 TLB configuration
5753 */
5754
5755 switch (cpi->cpi_vendor) {
5756 uint_t nentries;
5757 case X86_VENDOR_TM:
5758 if (cpi->cpi_family >= 5) {
5759 /*
5760 * Crusoe processors have 256 TLB entries, but
5761 * cpuid data format constrains them to only
5762 * reporting 255 of them.
5763 */
5764 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5765 nentries = 256;
5766 /*
5767 * Crusoe processors also have a unified TLB
5768 */
5769 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5770 nentries);
5771 break;
5772 }
5773 /*FALLTHROUGH*/
5774 default:
5775 add_amd_tlb(devi, itlb4k_str,
5776 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5777 add_amd_tlb(devi, dtlb4k_str,
5778 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5779 break;
5780 }
5781
5782 /*
5783 * data L1 cache configuration
5784 */
5785
5786 add_amd_cache(devi, l1_dcache_str,
5787 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5788 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5789
5790 /*
5791 * code L1 cache configuration
5792 */
5793
5794 add_amd_cache(devi, l1_icache_str,
5795 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5796 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5797
5798 if (cpi->cpi_xmaxeax < 0x80000006)
5799 return;
5800 cp = &cpi->cpi_extd[6];
5801
5802 /* Check for a unified L2 TLB for large pages */
5803
5804 if (BITX(cp->cp_eax, 31, 16) == 0)
5805 add_amd_l2_tlb(devi, "l2-tlb-2M",
5806 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5807 else {
5808 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5809 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5810 add_amd_l2_tlb(devi, "l2-itlb-2M",
5811 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5812 }
5813
5814 /* Check for a unified L2 TLB for 4K pages */
5815
5816 if (BITX(cp->cp_ebx, 31, 16) == 0) {
5817 add_amd_l2_tlb(devi, "l2-tlb-4K",
5818 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5819 } else {
5820 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5821 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5822 add_amd_l2_tlb(devi, "l2-itlb-4K",
5823 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5824 }
5825
5826 add_amd_l2_cache(devi, l2_cache_str,
5827 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5828 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5829 }
5830
5831 /*
5832 * There are two basic ways that the x86 world describes it cache
5833 * and tlb architecture - Intel's way and AMD's way.
5834 *
5835 * Return which flavor of cache architecture we should use
5836 */
5837 static int
5838 x86_which_cacheinfo(struct cpuid_info *cpi)
5839 {
5840 switch (cpi->cpi_vendor) {
5841 case X86_VENDOR_Intel:
5842 if (cpi->cpi_maxeax >= 2)
5843 return (X86_VENDOR_Intel);
5844 break;
5845 case X86_VENDOR_AMD:
5846 /*
5847 * The K5 model 1 was the first part from AMD that reported
5848 * cache sizes via extended cpuid functions.
5849 */
5850 if (cpi->cpi_family > 5 ||
5851 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5852 return (X86_VENDOR_AMD);
5853 break;
5854 case X86_VENDOR_TM:
5855 if (cpi->cpi_family >= 5)
5856 return (X86_VENDOR_AMD);
5857 /*FALLTHROUGH*/
5858 default:
5859 /*
5860 * If they have extended CPU data for 0x80000005
5861 * then we assume they have AMD-format cache
5862 * information.
5863 *
5864 * If not, and the vendor happens to be Cyrix,
5865 * then try our-Cyrix specific handler.
5866 *
5867 * If we're not Cyrix, then assume we're using Intel's
5868 * table-driven format instead.
5869 */
5870 if (cpi->cpi_xmaxeax >= 0x80000005)
5871 return (X86_VENDOR_AMD);
5872 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5873 return (X86_VENDOR_Cyrix);
5874 else if (cpi->cpi_maxeax >= 2)
5875 return (X86_VENDOR_Intel);
5876 break;
5877 }
5878 return (-1);
5879 }
5880
5881 void
5882 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5883 struct cpuid_info *cpi)
5884 {
5885 dev_info_t *cpu_devi;
5886 int create;
5887
5888 cpu_devi = (dev_info_t *)dip;
5889
5890 /* device_type */
5891 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5892 "device_type", "cpu");
5893
5894 /* reg */
5895 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5896 "reg", cpu_id);
5897
5898 /* cpu-mhz, and clock-frequency */
5899 if (cpu_freq > 0) {
5900 long long mul;
5901
5902 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5903 "cpu-mhz", cpu_freq);
5904 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5905 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5906 "clock-frequency", (int)mul);
5907 }
5908
5909 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5910 return;
5911 }
5912
5913 /* vendor-id */
5914 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5915 "vendor-id", cpi->cpi_vendorstr);
5916
5917 if (cpi->cpi_maxeax == 0) {
5918 return;
5919 }
5920
5921 /*
5922 * family, model, and step
5923 */
5924 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5925 "family", CPI_FAMILY(cpi));
5926 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5927 "cpu-model", CPI_MODEL(cpi));
5928 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5929 "stepping-id", CPI_STEP(cpi));
5930
5931 /* type */
5932 switch (cpi->cpi_vendor) {
5933 case X86_VENDOR_Intel:
5934 create = 1;
5935 break;
5936 default:
5937 create = 0;
5938 break;
5939 }
5940 if (create)
5941 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5942 "type", CPI_TYPE(cpi));
5943
5944 /* ext-family */
5945 switch (cpi->cpi_vendor) {
5946 case X86_VENDOR_Intel:
5947 case X86_VENDOR_AMD:
5948 create = cpi->cpi_family >= 0xf;
5949 break;
5950 default:
5951 create = 0;
5952 break;
5953 }
5954 if (create)
5955 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5956 "ext-family", CPI_FAMILY_XTD(cpi));
5957
5958 /* ext-model */
5959 switch (cpi->cpi_vendor) {
5960 case X86_VENDOR_Intel:
5961 create = IS_EXTENDED_MODEL_INTEL(cpi);
5962 break;
5963 case X86_VENDOR_AMD:
5964 create = CPI_FAMILY(cpi) == 0xf;
5965 break;
5966 default:
5967 create = 0;
5968 break;
5969 }
5970 if (create)
5971 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5972 "ext-model", CPI_MODEL_XTD(cpi));
5973
5974 /* generation */
5975 switch (cpi->cpi_vendor) {
5976 case X86_VENDOR_AMD:
5977 /*
5978 * AMD K5 model 1 was the first part to support this
5979 */
5980 create = cpi->cpi_xmaxeax >= 0x80000001;
5981 break;
5982 default:
5983 create = 0;
5984 break;
5985 }
5986 if (create)
5987 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5988 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5989
5990 /* brand-id */
5991 switch (cpi->cpi_vendor) {
5992 case X86_VENDOR_Intel:
5993 /*
5994 * brand id first appeared on Pentium III Xeon model 8,
5995 * and Celeron model 8 processors and Opteron
5996 */
5997 create = cpi->cpi_family > 6 ||
5998 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5999 break;
6000 case X86_VENDOR_AMD:
6001 create = cpi->cpi_family >= 0xf;
6002 break;
6003 default:
6004 create = 0;
6005 break;
6006 }
6007 if (create && cpi->cpi_brandid != 0) {
6008 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6009 "brand-id", cpi->cpi_brandid);
6010 }
6011
6012 /* chunks, and apic-id */
6013 switch (cpi->cpi_vendor) {
6014 /*
6015 * first available on Pentium IV and Opteron (K8)
6016 */
6017 case X86_VENDOR_Intel:
6018 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6019 break;
6020 case X86_VENDOR_AMD:
6021 create = cpi->cpi_family >= 0xf;
6022 break;
6023 default:
6024 create = 0;
6025 break;
6026 }
6027 if (create) {
6028 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6029 "chunks", CPI_CHUNKS(cpi));
6030 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6031 "apic-id", cpi->cpi_apicid);
6032 if (cpi->cpi_chipid >= 0) {
6033 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6034 "chip#", cpi->cpi_chipid);
6035 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6036 "clog#", cpi->cpi_clogid);
6037 }
6038 }
6039
6040 /* cpuid-features */
6041 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6042 "cpuid-features", CPI_FEATURES_EDX(cpi));
6043
6044
6045 /* cpuid-features-ecx */
6046 switch (cpi->cpi_vendor) {
6047 case X86_VENDOR_Intel:
6048 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6049 break;
6050 case X86_VENDOR_AMD:
6051 create = cpi->cpi_family >= 0xf;
6052 break;
6053 default:
6054 create = 0;
6055 break;
6056 }
6057 if (create)
6058 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6059 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6060
6061 /* ext-cpuid-features */
6062 switch (cpi->cpi_vendor) {
6063 case X86_VENDOR_Intel:
6064 case X86_VENDOR_AMD:
6065 case X86_VENDOR_Cyrix:
6066 case X86_VENDOR_TM:
6067 case X86_VENDOR_Centaur:
6068 create = cpi->cpi_xmaxeax >= 0x80000001;
6069 break;
6070 default:
6071 create = 0;
6072 break;
6073 }
6074 if (create) {
6075 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6076 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6077 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6078 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6079 }
6080
6081 /*
6082 * Brand String first appeared in Intel Pentium IV, AMD K5
6083 * model 1, and Cyrix GXm. On earlier models we try and
6084 * simulate something similar .. so this string should always
6085 * same -something- about the processor, however lame.
6086 */
6087 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6088 "brand-string", cpi->cpi_brandstr);
6089
6090 /*
6091 * Finally, cache and tlb information
6092 */
6093 switch (x86_which_cacheinfo(cpi)) {
6094 case X86_VENDOR_Intel:
6095 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6096 break;
6097 case X86_VENDOR_Cyrix:
6098 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6099 break;
6100 case X86_VENDOR_AMD:
6101 amd_cache_info(cpi, cpu_devi);
6102 break;
6103 default:
6104 break;
6105 }
6106 }
6107
6108 struct l2info {
6109 int *l2i_csz;
6110 int *l2i_lsz;
6111 int *l2i_assoc;
6112 int l2i_ret;
6113 };
6114
6115 /*
6116 * A cacheinfo walker that fetches the size, line-size and associativity
6117 * of the L2 cache
6118 */
6119 static int
6120 intel_l2cinfo(void *arg, const struct cachetab *ct)
6121 {
6122 struct l2info *l2i = arg;
6123 int *ip;
6124
6125 if (ct->ct_label != l2_cache_str &&
6126 ct->ct_label != sl2_cache_str)
6127 return (0); /* not an L2 -- keep walking */
6128
6129 if ((ip = l2i->l2i_csz) != NULL)
6130 *ip = ct->ct_size;
6131 if ((ip = l2i->l2i_lsz) != NULL)
6132 *ip = ct->ct_line_size;
6133 if ((ip = l2i->l2i_assoc) != NULL)
6134 *ip = ct->ct_assoc;
6135 l2i->l2i_ret = ct->ct_size;
6136 return (1); /* was an L2 -- terminate walk */
6137 }
6138
6139 /*
6140 * AMD L2/L3 Cache and TLB Associativity Field Definition:
6141 *
6142 * Unlike the associativity for the L1 cache and tlb where the 8 bit
6143 * value is the associativity, the associativity for the L2 cache and
6144 * tlb is encoded in the following table. The 4 bit L2 value serves as
6145 * an index into the amd_afd[] array to determine the associativity.
6146 * -1 is undefined. 0 is fully associative.
6147 */
6148
6149 static int amd_afd[] =
6150 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6151
6152 static void
6153 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6154 {
6155 struct cpuid_regs *cp;
6156 uint_t size, assoc;
6157 int i;
6158 int *ip;
6159
6160 if (cpi->cpi_xmaxeax < 0x80000006)
6161 return;
6162 cp = &cpi->cpi_extd[6];
6163
6164 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6165 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6166 uint_t cachesz = size * 1024;
6167 assoc = amd_afd[i];
6168
6169 ASSERT(assoc != -1);
6170
6171 if ((ip = l2i->l2i_csz) != NULL)
6172 *ip = cachesz;
6173 if ((ip = l2i->l2i_lsz) != NULL)
6174 *ip = BITX(cp->cp_ecx, 7, 0);
6175 if ((ip = l2i->l2i_assoc) != NULL)
6176 *ip = assoc;
6177 l2i->l2i_ret = cachesz;
6178 }
6179 }
6180
6181 int
6182 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6183 {
6184 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6185 struct l2info __l2info, *l2i = &__l2info;
6186
6187 l2i->l2i_csz = csz;
6188 l2i->l2i_lsz = lsz;
6189 l2i->l2i_assoc = assoc;
6190 l2i->l2i_ret = -1;
6191
6192 switch (x86_which_cacheinfo(cpi)) {
6193 case X86_VENDOR_Intel:
6194 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6195 break;
6196 case X86_VENDOR_Cyrix:
6197 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6198 break;
6199 case X86_VENDOR_AMD:
6200 amd_l2cacheinfo(cpi, l2i);
6201 break;
6202 default:
6203 break;
6204 }
6205 return (l2i->l2i_ret);
6206 }
6207
6208 #if !defined(__xpv)
6209
6210 uint32_t *
6211 cpuid_mwait_alloc(cpu_t *cpu)
6212 {
6213 uint32_t *ret;
6214 size_t mwait_size;
6215
6216 ASSERT(cpuid_checkpass(CPU, 2));
6217
6218 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6219 if (mwait_size == 0)
6220 return (NULL);
6221
6222 /*
6223 * kmem_alloc() returns cache line size aligned data for mwait_size
6224 * allocations. mwait_size is currently cache line sized. Neither
6225 * of these implementation details are guarantied to be true in the
6226 * future.
6227 *
6228 * First try allocating mwait_size as kmem_alloc() currently returns
6229 * correctly aligned memory. If kmem_alloc() does not return
6230 * mwait_size aligned memory, then use mwait_size ROUNDUP.
6231 *
6232 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6233 * decide to free this memory.
6234 */
6235 ret = kmem_zalloc(mwait_size, KM_SLEEP);
6236 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6237 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6238 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6239 *ret = MWAIT_RUNNING;
6240 return (ret);
6241 } else {
6242 kmem_free(ret, mwait_size);
6243 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6244 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6245 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6246 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6247 *ret = MWAIT_RUNNING;
6248 return (ret);
6249 }
6250 }
6251
6252 void
6253 cpuid_mwait_free(cpu_t *cpu)
6254 {
6255 if (cpu->cpu_m.mcpu_cpi == NULL) {
6256 return;
6257 }
6258
6259 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6260 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6261 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6262 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6263 }
6264
6265 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6266 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6267 }
6268
6269 void
6270 patch_tsc_read(int flag)
6271 {
6272 size_t cnt;
6273
6274 switch (flag) {
6275 case TSC_NONE:
6276 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6277 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6278 break;
6279 case TSC_RDTSC_MFENCE:
6280 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6281 (void) memcpy((void *)tsc_read,
6282 (void *)&_tsc_mfence_start, cnt);
6283 break;
6284 case TSC_RDTSC_LFENCE:
6285 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6286 (void) memcpy((void *)tsc_read,
6287 (void *)&_tsc_lfence_start, cnt);
6288 break;
6289 case TSC_TSCP:
6290 cnt = &_tscp_end - &_tscp_start;
6291 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6292 break;
6293 default:
6294 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6295 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6296 break;
6297 }
6298 tsc_type = flag;
6299 }
6300
6301 int
6302 cpuid_deep_cstates_supported(void)
6303 {
6304 struct cpuid_info *cpi;
6305 struct cpuid_regs regs;
6306
6307 ASSERT(cpuid_checkpass(CPU, 1));
6308
6309 cpi = CPU->cpu_m.mcpu_cpi;
6310
6311 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6312 return (0);
6313
6314 switch (cpi->cpi_vendor) {
6315 case X86_VENDOR_Intel:
6316 if (cpi->cpi_xmaxeax < 0x80000007)
6317 return (0);
6318
6319 /*
6320 * TSC run at a constant rate in all ACPI C-states?
6321 */
6322 regs.cp_eax = 0x80000007;
6323 (void) __cpuid_insn(®s);
6324 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6325
6326 default:
6327 return (0);
6328 }
6329 }
6330
6331 #endif /* !__xpv */
6332
6333 void
6334 post_startup_cpu_fixups(void)
6335 {
6336 #ifndef __xpv
6337 /*
6338 * Some AMD processors support C1E state. Entering this state will
6339 * cause the local APIC timer to stop, which we can't deal with at
6340 * this time.
6341 */
6342 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6343 on_trap_data_t otd;
6344 uint64_t reg;
6345
6346 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6347 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6348 /* Disable C1E state if it is enabled by BIOS */
6349 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6350 AMD_ACTONCMPHALT_MASK) {
6351 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6352 AMD_ACTONCMPHALT_SHIFT);
6353 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6354 }
6355 }
6356 no_trap();
6357 }
6358 #endif /* !__xpv */
6359 }
6360
6361 void
6362 enable_pcid(void)
6363 {
6364 if (x86_use_pcid == -1)
6365 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6366
6367 if (x86_use_invpcid == -1) {
6368 x86_use_invpcid = is_x86_feature(x86_featureset,
6369 X86FSET_INVPCID);
6370 }
6371
6372 if (!x86_use_pcid)
6373 return;
6374
6375 /*
6376 * Intel say that on setting PCIDE, it immediately starts using the PCID
6377 * bits; better make sure there's nothing there.
6378 */
6379 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6380
6381 setcr4(getcr4() | CR4_PCIDE);
6382 }
6383
6384 /*
6385 * Setup necessary registers to enable XSAVE feature on this processor.
6386 * This function needs to be called early enough, so that no xsave/xrstor
6387 * ops will execute on the processor before the MSRs are properly set up.
6388 *
6389 * Current implementation has the following assumption:
6390 * - cpuid_pass1() is done, so that X86 features are known.
6391 * - fpu_probe() is done, so that fp_save_mech is chosen.
6392 */
6393 void
6394 xsave_setup_msr(cpu_t *cpu)
6395 {
6396 ASSERT(fp_save_mech == FP_XSAVE);
6397 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6398
6399 /* Enable OSXSAVE in CR4. */
6400 setcr4(getcr4() | CR4_OSXSAVE);
6401 /*
6402 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6403 * correct value.
6404 */
6405 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6406 setup_xfem();
6407 }
6408
6409 /*
6410 * Starting with the Westmere processor the local
6411 * APIC timer will continue running in all C-states,
6412 * including the deepest C-states.
6413 */
6414 int
6415 cpuid_arat_supported(void)
6416 {
6417 struct cpuid_info *cpi;
6418 struct cpuid_regs regs;
6419
6420 ASSERT(cpuid_checkpass(CPU, 1));
6421 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6422
6423 cpi = CPU->cpu_m.mcpu_cpi;
6424
6425 switch (cpi->cpi_vendor) {
6426 case X86_VENDOR_Intel:
6427 /*
6428 * Always-running Local APIC Timer is
6429 * indicated by CPUID.6.EAX[2].
6430 */
6431 if (cpi->cpi_maxeax >= 6) {
6432 regs.cp_eax = 6;
6433 (void) cpuid_insn(NULL, ®s);
6434 return (regs.cp_eax & CPUID_CSTATE_ARAT);
6435 } else {
6436 return (0);
6437 }
6438 default:
6439 return (0);
6440 }
6441 }
6442
6443 /*
6444 * Check support for Intel ENERGY_PERF_BIAS feature
6445 */
6446 int
6447 cpuid_iepb_supported(struct cpu *cp)
6448 {
6449 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6450 struct cpuid_regs regs;
6451
6452 ASSERT(cpuid_checkpass(cp, 1));
6453
6454 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6455 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6456 return (0);
6457 }
6458
6459 /*
6460 * Intel ENERGY_PERF_BIAS MSR is indicated by
6461 * capability bit CPUID.6.ECX.3
6462 */
6463 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6464 return (0);
6465
6466 regs.cp_eax = 0x6;
6467 (void) cpuid_insn(NULL, ®s);
6468 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6469 }
6470
6471 /*
6472 * Check support for TSC deadline timer
6473 *
6474 * TSC deadline timer provides a superior software programming
6475 * model over local APIC timer that eliminates "time drifts".
6476 * Instead of specifying a relative time, software specifies an
6477 * absolute time as the target at which the processor should
6478 * generate a timer event.
6479 */
6480 int
6481 cpuid_deadline_tsc_supported(void)
6482 {
6483 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6484 struct cpuid_regs regs;
6485
6486 ASSERT(cpuid_checkpass(CPU, 1));
6487 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6488
6489 switch (cpi->cpi_vendor) {
6490 case X86_VENDOR_Intel:
6491 if (cpi->cpi_maxeax >= 1) {
6492 regs.cp_eax = 1;
6493 (void) cpuid_insn(NULL, ®s);
6494 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6495 } else {
6496 return (0);
6497 }
6498 default:
6499 return (0);
6500 }
6501 }
6502
6503 #if defined(__amd64) && !defined(__xpv)
6504 /*
6505 * Patch in versions of bcopy for high performance Intel Nhm processors
6506 * and later...
6507 */
6508 void
6509 patch_memops(uint_t vendor)
6510 {
6511 size_t cnt, i;
6512 caddr_t to, from;
6513
6514 if ((vendor == X86_VENDOR_Intel) &&
6515 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6516 cnt = &bcopy_patch_end - &bcopy_patch_start;
6517 to = &bcopy_ck_size;
6518 from = &bcopy_patch_start;
6519 for (i = 0; i < cnt; i++) {
6520 *to++ = *from++;
6521 }
6522 }
6523 }
6524 #endif /* __amd64 && !__xpv */
6525
6526 /*
6527 * We're being asked to tell the system how many bits are required to represent
6528 * the various thread and strand IDs. While it's tempting to derive this based
6529 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6530 * correct. Instead, this needs to be based on the number of bits that the APIC
6531 * allows for these different configurations. We only update these to a larger
6532 * value if we find one.
6533 */
6534 void
6535 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6536 {
6537 struct cpuid_info *cpi;
6538
6539 VERIFY(cpuid_checkpass(CPU, 1));
6540 cpi = cpu->cpu_m.mcpu_cpi;
6541
6542 if (cpi->cpi_ncore_bits > *core_nbits) {
6543 *core_nbits = cpi->cpi_ncore_bits;
6544 }
6545
6546 if (cpi->cpi_nthread_bits > *strand_nbits) {
6547 *strand_nbits = cpi->cpi_nthread_bits;
6548 }
6549 }
6550
6551 void
6552 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6553 {
6554 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6555 struct cpuid_regs cp;
6556
6557 /*
6558 * Reread the CPUID portions that we need for various security
6559 * information.
6560 */
6561 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6562 /*
6563 * Check if we now have leaf 7 available to us.
6564 */
6565 if (cpi->cpi_maxeax < 7) {
6566 bzero(&cp, sizeof (cp));
6567 cp.cp_eax = 0;
6568 cpi->cpi_maxeax = __cpuid_insn(&cp);
6569 if (cpi->cpi_maxeax < 7)
6570 return;
6571 }
6572
6573 bzero(&cp, sizeof (cp));
6574 cp.cp_eax = 7;
6575 cp.cp_ecx = 0;
6576 (void) __cpuid_insn(&cp);
6577 cpi->cpi_std[7] = cp;
6578 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6579 /* No xcpuid support */
6580 if (cpi->cpi_family < 5 ||
6581 (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6582 return;
6583
6584 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6585 bzero(&cp, sizeof (cp));
6586 cp.cp_eax = CPUID_LEAF_EXT_0;
6587 cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6588 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6589 return;
6590 }
6591 }
6592
6593 bzero(&cp, sizeof (cp));
6594 cp.cp_eax = CPUID_LEAF_EXT_8;
6595 (void) __cpuid_insn(&cp);
6596 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6597 cpi->cpi_extd[8] = cp;
6598 } else {
6599 /*
6600 * Nothing to do here. Return an empty set which has already
6601 * been zeroed for us.
6602 */
6603 return;
6604 }
6605 cpuid_scan_security(cpu, fset);
6606 }
6607
6608 /* ARGSUSED */
6609 static int
6610 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6611 {
6612 uchar_t *fset;
6613
6614 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6615 cpuid_pass_ucode(CPU, fset);
6616
6617 return (0);
6618 }
6619
6620 /*
6621 * After a microcode update where the version has changed, then we need to
6622 * rescan CPUID. To do this we check every CPU to make sure that they have the
6623 * same microcode. Then we perform a cross call to all such CPUs. It's the
6624 * caller's job to make sure that no one else can end up doing an update while
6625 * this is going on.
6626 *
6627 * We assume that the system is microcode capable if we're called.
6628 */
6629 void
6630 cpuid_post_ucodeadm(void)
6631 {
6632 uint32_t rev;
6633 int i;
6634 struct cpu *cpu;
6635 cpuset_t cpuset;
6636 void *argdata;
6637 uchar_t *f0;
6638
6639 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6640
6641 mutex_enter(&cpu_lock);
6642 cpu = cpu_get(0);
6643 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6644 CPUSET_ONLY(cpuset, 0);
6645 for (i = 1; i < max_ncpus; i++) {
6646 if ((cpu = cpu_get(i)) == NULL)
6647 continue;
6648
6649 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6650 panic("post microcode update CPU %d has differing "
6651 "microcode revision (%u) from CPU 0 (%u)",
6652 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6653 }
6654 CPUSET_ADD(cpuset, i);
6655 }
6656
6657 kpreempt_disable();
6658 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6659 cpuid_post_ucodeadm_xc);
6660 kpreempt_enable();
6661
6662 /*
6663 * OK, now look at each CPU and see if their feature sets are equal.
6664 */
6665 f0 = argdata;
6666 for (i = 1; i < max_ncpus; i++) {
6667 uchar_t *fset;
6668 if (!CPU_IN_SET(cpuset, i))
6669 continue;
6670
6671 fset = (uchar_t *)((uintptr_t)argdata +
6672 sizeof (x86_featureset) * i);
6673
6674 if (!compare_x86_featureset(f0, fset)) {
6675 panic("Post microcode update CPU %d has "
6676 "differing security feature (%p) set from CPU 0 "
6677 "(%p), not appending to feature set", i,
6678 (void *)fset, (void *)f0);
6679 }
6680 }
6681
6682 mutex_exit(&cpu_lock);
6683
6684 for (i = 0; i < NUM_X86_FEATURES; i++) {
6685 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6686 x86_feature_names[i]);
6687 if (is_x86_feature(f0, i)) {
6688 add_x86_feature(x86_featureset, i);
6689 }
6690 }
6691 kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6692 }