1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 */
27 /*
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
30 */
31 /*
32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 */
34 /*
35 * Copyright 2019, Joyent, Inc.
36 */
37
38 /*
39 * CPU Identification logic
40 *
41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
42 * with the identification of CPUs, their features, and their topologies. More
43 * specifically, this file helps drive the following:
44 *
45 * 1. Enumeration of features of the processor which are used by the kernel to
46 * determine what features to enable or disable. These may be instruction set
47 * enhancements or features that we use.
48 *
49 * 2. Enumeration of instruction set architecture (ISA) additions that userland
50 * will be told about through the auxiliary vector.
51 *
52 * 3. Understanding the physical topology of the CPU such as the number of
53 * caches, how many cores it has, whether or not it supports symmetric
54 * multi-processing (SMT), etc.
55 *
56 * ------------------------
57 * CPUID History and Basics
58 * ------------------------
59 *
60 * The cpuid instruction was added by Intel roughly around the time that the
61 * original Pentium was introduced. The purpose of cpuid was to tell in a
62 * programmatic fashion information about the CPU that previously was guessed
63 * at. For example, an important part of cpuid is that we can know what
64 * extensions to the ISA exist. If you use an invalid opcode you would get a
65 * #UD, so this method allows a program (whether a user program or the kernel)
66 * to determine what exists without crashing or getting a SIGILL. Of course,
67 * this was also during the era of the clones and the AMD Am5x86. The vendor
68 * name shows up first in cpuid for a reason.
69 *
70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
72 * its own meaning. The different leaves are broken down into different regions:
73 *
74 * [ 0, 7fffffff ] This region is called the 'basic'
75 * region. This region is generally defined
76 * by Intel, though some of the original
77 * portions have different meanings based
78 * on the manufacturer. These days, Intel
79 * adds most new features to this region.
80 * AMD adds non-Intel compatible
81 * information in the third, extended
82 * region. Intel uses this for everything
83 * including ISA extensions, CPU
84 * features, cache information, topology,
85 * and more.
86 *
87 * There is a hole carved out of this
88 * region which is reserved for
89 * hypervisors.
90 *
91 * [ 40000000, 4fffffff ] This region, which is found in the
92 * middle of the previous region, is
93 * explicitly promised to never be used by
94 * CPUs. Instead, it is used by hypervisors
95 * to communicate information about
96 * themselves to the operating system. The
97 * values and details are unique for each
98 * hypervisor.
99 *
100 * [ 80000000, ffffffff ] This region is called the 'extended'
101 * region. Some of the low leaves mirror
102 * parts of the basic leaves. This region
103 * has generally been used by AMD for
104 * various extensions. For example, AMD-
105 * specific information about caches,
106 * features, and topology are found in this
107 * region.
108 *
109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of
111 * the ranges, one of the primary things returned is the maximum valid leaf in
112 * that range. This allows for discovery of what range of CPUID is valid.
113 *
114 * The CPUs have potentially surprising behavior when using an invalid leaf or
115 * unimplemented leaf. If the requested leaf is within the valid basic or
116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
117 * set to zero. However, if you specify a leaf that is outside of a valid range,
118 * then instead it will be filled with the last valid _basic_ leaf. For example,
119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
120 * an invalid extended leaf will return the information for leaf 3.
121 *
122 * Some leaves are broken down into sub-leaves. This means that the value
123 * depends on both the leaf asked for in %eax and a secondary register. For
124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
125 * additional information. Or when getting topology information in leaf 0xb, the
126 * initial value in %ecx changes which level of the topology that you are
127 * getting information about.
128 *
129 * cpuid values are always kept to 32 bits regardless of whether or not the
130 * program is in 64-bit mode. When executing in 64-bit mode, the upper
131 * 32 bits of the register are always set to zero so that way the values are the
132 * same regardless of execution mode.
133 *
134 * ----------------------
135 * Identifying Processors
136 * ----------------------
137 *
138 * We can identify a processor in two steps. The first step looks at cpuid leaf
139 * 0. Leaf 0 contains the processor's vendor information. This is done by
140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
142 *
143 * From there, a processor is identified by a combination of three different
144 * values:
145 *
146 * 1. Family
147 * 2. Model
148 * 3. Stepping
149 *
150 * Each vendor uses the family and model to uniquely identify a processor. The
151 * way that family and model are changed depends on the vendor. For example,
152 * Intel has been using family 0x6 for almost all of their processor since the
153 * Pentium Pro/Pentium II era, often called the P6. The model is used to
154 * identify the exact processor. Different models are often used for the client
155 * (consumer) and server parts. Even though each processor often has major
156 * architectural differences, they still are considered the same family by
157 * Intel.
158 *
159 * On the other hand, each major AMD architecture generally has its own family.
160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
161 * the model number is used to help identify specific processors.
162 *
163 * The stepping is used to refer to a revision of a specific microprocessor. The
164 * term comes from equipment used to produce masks that are used to create
165 * integrated circuits.
166 *
167 * The information is present in leaf 1, %eax. In technical documentation you
168 * will see the terms extended model and extended family. The original family,
169 * model, and stepping fields were each 4 bits wide. If the values in either
170 * are 0xf, then one is to consult the extended model and extended family, which
171 * take previously reserved bits and allow for a larger number of models and add
172 * 0xf to them.
173 *
174 * When we process this information, we store the full family, model, and
175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and
176 * cpi_step, respectively. Whenever you are performing comparisons with the
177 * family, model, and stepping, you should use these members and not the raw
178 * values from cpuid. If you must use the raw values from cpuid directly, you
179 * must make sure that you add the extended model and family to the base model
180 * and family.
181 *
182 * In general, we do not use information about the family, model, and stepping
183 * to determine whether or not a feature is present; that is generally driven by
184 * specific leaves. However, when something we care about on the processor is
185 * not considered 'architectural' meaning that it is specific to a set of
186 * processors and not promised in the architecture model to be consistent from
187 * generation to generation, then we will fall back on this information. The
188 * most common cases where this comes up is when we have to workaround errata in
189 * the processor, are dealing with processor-specific features such as CPU
190 * performance counters, or we want to provide additional information for things
191 * such as fault management.
192 *
193 * While processors also do have a brand string, which is the name that people
194 * are familiar with when buying the processor, they are not meant for
195 * programmatic consumption. That is what the family, model, and stepping are
196 * for.
197 *
198 * ------------
199 * CPUID Passes
200 * ------------
201 *
202 * As part of performing feature detection, we break this into several different
203 * passes. The passes are as follows:
204 *
205 * Pass 0 This is a primordial pass done in locore.s to deal with
206 * Cyrix CPUs that don't support cpuid. The reality is that
207 * we likely don't run on them any more, but there is still
208 * logic for handling them.
209 *
210 * Pass 1 This is the primary pass and is responsible for doing a
211 * large number of different things:
212 *
213 * 1. Determine which vendor manufactured the CPU and
214 * determining the family, model, and stepping information.
215 *
216 * 2. Gathering a large number of feature flags to
217 * determine which features the CPU support and which
218 * indicate things that we need to do other work in the OS
219 * to enable. Features detected this way are added to the
220 * x86_featureset which can be queried to
221 * determine what we should do. This includes processing
222 * all of the basic and extended CPU features that we care
223 * about.
224 *
225 * 3. Determining the CPU's topology. This includes
226 * information about how many cores and threads are present
227 * in the package. It also is responsible for figuring out
228 * which logical CPUs are potentially part of the same core
229 * and what other resources they might share. For more
230 * information see the 'Topology' section.
231 *
232 * 4. Determining the set of CPU security-specific features
233 * that we need to worry about and determine the
234 * appropriate set of workarounds.
235 *
236 * Pass 1 on the boot CPU occurs before KMDB is started.
237 *
238 * Pass 2 The second pass is done after startup(). Here, we check
239 * other miscellaneous features. Most of this is gathering
240 * additional basic and extended features that we'll use in
241 * later passes or for debugging support.
242 *
243 * Pass 3 The third pass occurs after the kernel memory allocator
244 * has been fully initialized. This gathers information
245 * where we might need dynamic memory available for our
246 * uses. This includes several varying width leaves that
247 * have cache information and the processor's brand string.
248 *
249 * Pass 4 The fourth and final normal pass is performed after the
250 * kernel has brought most everything online. This is
251 * invoked from post_startup(). In this pass, we go through
252 * the set of features that we have enabled and turn that
253 * into the hardware auxiliary vector features that
254 * userland receives. This is used by userland, primarily
255 * by the run-time link-editor (RTLD), though userland
256 * software could also refer to it directly.
257 *
258 * Microcode After a microcode update, we do a selective rescan of
259 * the cpuid leaves to determine what features have
260 * changed. Microcode updates can provide more details
261 * about security related features to deal with issues like
262 * Spectre and L1TF. On occasion, vendors have violated
263 * their contract and removed bits. However, we don't try
264 * to detect that because that puts us in a situation that
265 * we really can't deal with. As such, the only thing we
266 * rescan are security related features today. See
267 * cpuid_pass_ucode().
268 *
269 * All of the passes (except pass 0) are run on all CPUs. However, for the most
270 * part we only care about what the boot CPU says about this information and use
271 * the other CPUs as a rough guide to sanity check that we have the same feature
272 * set.
273 *
274 * We do not support running multiple logical CPUs with disjoint, let alone
275 * different, feature sets.
276 *
277 * ------------------
278 * Processor Topology
279 * ------------------
280 *
281 * One of the important things that we need to do is to understand the topology
282 * of the underlying processor. When we say topology in this case, we're trying
283 * to understand the relationship between the logical CPUs that the operating
284 * system sees and the underlying physical layout. Different logical CPUs may
285 * share different resources which can have important consequences for the
286 * performance of the system. For example, they may share caches, execution
287 * units, and more.
288 *
289 * The topology of the processor changes from generation to generation and
290 * vendor to vendor. Along with that, different vendors use different
291 * terminology, and the operating system itself uses occasionally overlapping
292 * terminology. It's important to understand what this topology looks like so
293 * one can understand the different things that we try to calculate and
294 * determine.
295 *
296 * To get started, let's talk about a little bit of terminology that we've used
297 * so far, is used throughout this file, and is fairly generic across multiple
298 * vendors:
299 *
300 * CPU
301 * A central processing unit (CPU) refers to a logical and/or virtual
302 * entity that the operating system can execute instructions on. The
303 * underlying resources for this CPU may be shared between multiple
304 * entities; however, to the operating system it is a discrete unit.
305 *
306 * PROCESSOR and PACKAGE
307 *
308 * Generally, when we use the term 'processor' on its own, we are referring
309 * to the physical entity that one buys and plugs into a board. However,
310 * because processor has been overloaded and one might see it used to mean
311 * multiple different levels, we will instead use the term 'package' for
312 * the rest of this file. The term package comes from the electrical
313 * engineering side and refers to the physical entity that encloses the
314 * electronics inside. Strictly speaking the package can contain more than
315 * just the CPU, for example, on many processors it may also have what's
316 * called an 'integrated graphical processing unit (GPU)'. Because the
317 * package can encapsulate multiple units, it is the largest physical unit
318 * that we refer to.
319 *
320 * SOCKET
321 *
322 * A socket refers to unit on a system board (generally the motherboard)
323 * that can receive a package. A single package, or processor, is plugged
324 * into a single socket. A system may have multiple sockets. Often times,
325 * the term socket is used interchangeably with package and refers to the
326 * electrical component that has plugged in, and not the receptacle itself.
327 *
328 * CORE
329 *
330 * A core refers to the physical instantiation of a CPU, generally, with a
331 * full set of hardware resources available to it. A package may contain
332 * multiple cores inside of it or it may just have a single one. A
333 * processor with more than one core is often referred to as 'multi-core'.
334 * In illumos, we will use the feature X86FSET_CMP to refer to a system
335 * that has 'multi-core' processors.
336 *
337 * A core may expose a single logical CPU to the operating system, or it
338 * may expose multiple CPUs, which we call threads, defined below.
339 *
340 * Some resources may still be shared by cores in the same package. For
341 * example, many processors will share the level 3 cache between cores.
342 * Some AMD generations share hardware resources between cores. For more
343 * information on that see the section 'AMD Topology'.
344 *
345 * THREAD and STRAND
346 *
347 * In this file, generally a thread refers to a hardware resources and not
348 * the operating system's logical abstraction. A thread is always exposed
349 * as an independent logical CPU to the operating system. A thread belongs
350 * to a specific core. A core may have more than one thread. When that is
351 * the case, the threads that are part of the same core are often referred
352 * to as 'siblings'.
353 *
354 * When multiple threads exist, this is generally referred to as
355 * simultaneous multi-threading (SMT). When Intel introduced this in their
356 * processors they called it hyper-threading (HT). When multiple threads
357 * are active in a core, they split the resources of the core. For example,
358 * two threads may share the same set of hardware execution units.
359 *
360 * The operating system often uses the term 'strand' to refer to a thread.
361 * This helps disambiguate it from the software concept.
362 *
363 * CHIP
364 *
365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most
366 * base meaning, it is used to refer to a single integrated circuit, which
367 * may or may not be the only thing in the package. In illumos, when you
368 * see the term 'chip' it is almost always referring to the same thing as
369 * the 'package'. However, many vendors may use chip to refer to one of
370 * many integrated circuits that have been placed in the package. As an
371 * example, see the subsequent definition.
372 *
373 * To try and keep things consistent, we will only use chip when referring
374 * to the entire integrated circuit package, with the exception of the
375 * definition of multi-chip module (because it is in the name) and use the
376 * term 'die' when we want the more general, potential sub-component
377 * definition.
378 *
379 * DIE
380 *
381 * A die refers to an integrated circuit. Inside of the package there may
382 * be a single die or multiple dies. This is sometimes called a 'chip' in
383 * vendor's parlance, but in this file, we use the term die to refer to a
384 * subcomponent.
385 *
386 * MULTI-CHIP MODULE
387 *
388 * A multi-chip module (MCM) refers to putting multiple distinct chips that
389 * are connected together in the same package. When a multi-chip design is
390 * used, generally each chip is manufactured independently and then joined
391 * together in the package. For example, on AMD's Zen microarchitecture
392 * (family 0x17), the package contains several dies (the second meaning of
393 * chip from above) that are connected together.
394 *
395 * CACHE
396 *
397 * A cache is a part of the processor that maintains copies of recently
398 * accessed memory. Caches are split into levels and then into types.
399 * Commonly there are one to three levels, called level one, two, and
400 * three. The lower the level, the smaller it is, the closer it is to the
401 * execution units of the CPU, and the faster it is to access. The layout
402 * and design of the cache come in many different flavors, consult other
403 * resources for a discussion of those.
404 *
405 * Caches are generally split into two types, the instruction and data
406 * cache. The caches contain what their names suggest, the instruction
407 * cache has executable program text, while the data cache has all other
408 * memory that the processor accesses. As of this writing, data is kept
409 * coherent between all of the caches on x86, so if one modifies program
410 * text before it is executed, that will be in the data cache, and the
411 * instruction cache will be synchronized with that change when the
412 * processor actually executes those instructions. This coherency also
413 * covers the fact that data could show up in multiple caches.
414 *
415 * Generally, the lowest level caches are specific to a core. However, the
416 * last layer cache is shared between some number of cores. The number of
417 * CPUs sharing this last level cache is important. This has implications
418 * for the choices that the scheduler makes, as accessing memory that might
419 * be in a remote cache after thread migration can be quite expensive.
420 *
421 * Sometimes, the word cache is abbreviated with a '$', because in US
422 * English the word cache is pronounced the same as cash. So L1D$ refers to
423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used
424 * in the rest of this theory statement for clarity.
425 *
426 * MEMORY CONTROLLER
427 *
428 * The memory controller is a component that provides access to DRAM. Each
429 * memory controller can access a set number of DRAM channels. Each channel
430 * can have a number of DIMMs (sticks of memory) associated with it. A
431 * given package may have more than one memory controller. The association
432 * of the memory controller to a group of cores is important as it is
433 * cheaper to access memory on the controller that you are associated with.
434 *
435 * NUMA
436 *
437 * NUMA or non-uniform memory access, describes a way that systems are
438 * built. On x86, any processor core can address all of the memory in the
439 * system. However, When using multiple sockets or possibly within a
440 * multi-chip module, some of that memory is physically closer and some of
441 * it is further. Memory that is further away is more expensive to access.
442 * Consider the following image of multiple sockets with memory:
443 *
444 * +--------+ +--------+
445 * | DIMM A | +----------+ +----------+ | DIMM D |
446 * +--------+-+ | | | | +-+------+-+
447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
448 * +--------+-+ | | | | +-+------+-+
449 * | DIMM C | +----------+ +----------+ | DIMM F |
450 * +--------+ +--------+
451 *
452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to
454 * access DIMMs A-C and more expensive to access D-F as it has to go
455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
456 * D-F are cheaper than A-C. While the socket form is the most common, when
457 * using multi-chip modules, this can also sometimes occur. For another
458 * example of this that's more involved, see the AMD topology section.
459 *
460 *
461 * Intel Topology
462 * --------------
463 *
464 * Most Intel processors since Nehalem, (as of this writing the current gen
465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
466 * the package is a single monolithic die. MCMs currently aren't used. Most
467 * parts have three levels of caches, with the L3 cache being shared between
468 * all of the cores on the package. The L1/L2 cache is generally specific to
469 * an individual core. The following image shows at a simplified level what
470 * this looks like. The memory controller is commonly part of something called
471 * the 'Uncore', that used to be separate physical chips that were not a part of
472 * the package, but are now part of the same chip.
473 *
474 * +-----------------------------------------------------------------------+
475 * | Package |
476 * | +-------------------+ +-------------------+ +-------------------+ |
477 * | | Core | | Core | | Core | |
478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | |
480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | |
481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | |
482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | |
483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
484 * | | +--------------+ | | +--------------+ | | +--------------+ | |
485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | |
486 * | | +--------------+ | | +--------------+ | | +--------------+ | |
487 * | +-------------------+ +-------------------+ +-------------------+ |
488 * | +-------------------------------------------------------------------+ |
489 * | | Shared L3 Cache | |
490 * | +-------------------------------------------------------------------+ |
491 * | +-------------------------------------------------------------------+ |
492 * | | Memory Controller | |
493 * | +-------------------------------------------------------------------+ |
494 * +-----------------------------------------------------------------------+
495 *
496 * A side effect of this current architecture is that what we care about from a
497 * scheduling and topology perspective, is simplified. In general we care about
498 * understanding which logical CPUs are part of the same core and socket.
499 *
500 * To determine the relationship between threads and cores, Intel initially used
501 * the identifier in the advanced programmable interrupt controller (APIC). They
502 * also added cpuid leaf 4 to give additional information about the number of
503 * threads and CPUs in the processor. With the addition of x2apic (which
504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
505 * additional cpuid topology leaf 0xB was added.
506 *
507 * AMD Topology
508 * ------------
509 *
510 * When discussing AMD topology, we want to break this into three distinct
511 * generations of topology. There's the basic topology that has been used in
512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
513 * with family 0x15 (Bulldozer), and there's the topology that was introduced
514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth
515 * talking about.
516 *
517 * Until the introduction of family 0x17 (Zen), AMD did not implement something
518 * that they considered SMT. Whether or not the AMD processors have SMT
519 * influences many things including scheduling and reliability, availability,
520 * and serviceability (RAS) features.
521 *
522 * NODE
523 *
524 * AMD uses the term node to refer to a die that contains a number of cores
525 * and I/O resources. Depending on the processor family and model, more
526 * than one node can be present in the package. When there is more than one
527 * node this indicates a multi-chip module. Usually each node has its own
528 * access to memory and I/O devices. This is important and generally
529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a
530 * result, we track this relationship in the operating system.
531 *
532 * In processors with an L3 cache, the L3 cache is generally shared across
533 * the entire node, though the way this is carved up varies from generation
534 * to generation.
535 *
536 * BULLDOZER
537 *
538 * Starting with the Bulldozer family (0x15) and continuing until the
539 * introduction of the Zen microarchitecture, AMD introduced the idea of a
540 * compute unit. In a compute unit, two traditional cores share a number of
541 * hardware resources. Critically, they share the FPU, L1 instruction
542 * cache, and the L2 cache. Several compute units were then combined inside
543 * of a single node. Because the integer execution units, L1 data cache,
544 * and some other resources were not shared between the cores, AMD never
545 * considered this to be SMT.
546 *
547 * ZEN
548 *
549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module
550 * is called Zeppelin. These modules are similar to the idea of nodes used
551 * previously. Each of these nodes has two DRAM channels which all of the
552 * cores in the node can access uniformly. These nodes are linked together
553 * in the package, creating a NUMA environment.
554 *
555 * The Zeppelin die itself contains two different 'core complexes'. Each
556 * core complex consists of four cores which each have two threads, for a
557 * total of 8 logical CPUs per complex. Unlike other generations,
558 * where all the logical CPUs in a given node share the L3 cache, here each
559 * core complex has its own shared L3 cache.
560 *
561 * A further thing that we need to consider is that in some configurations,
562 * particularly with the Threadripper line of processors, not every die
563 * actually has its memory controllers wired up to actual memory channels.
564 * This means that some cores have memory attached to them and others
565 * don't.
566 *
567 * To put Zen in perspective, consider the following images:
568 *
569 * +--------------------------------------------------------+
570 * | Core Complex |
571 * | +-------------------+ +-------------------+ +---+ |
572 * | | Core +----+ | | Core +----+ | | | |
573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | |
574 * | | | Thread | +----+ | | | Thread | +----+ | | | |
575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | |
576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | |
577 * | | +--------+ +--+ | | +--------+ +--+ | | | |
578 * | +-------------------+ +-------------------+ | C | |
579 * | +-------------------+ +-------------------+ | a | |
580 * | | Core +----+ | | Core +----+ | | c | |
581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | |
582 * | | | Thread | +----+ | | | Thread | +----+ | | e | |
583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | |
584 * | | | Thread | |L1| | | | Thread | |L1| | | | |
585 * | | +--------+ +--+ | | +--------+ +--+ | | | |
586 * | +-------------------+ +-------------------+ +---+ |
587 * | |
588 * +--------------------------------------------------------+
589 *
590 * This first image represents a single Zen core complex that consists of four
591 * cores.
592 *
593 *
594 * +--------------------------------------------------------+
595 * | Zeppelin Die |
596 * | +--------------------------------------------------+ |
597 * | | I/O Units (PCIe, SATA, USB, etc.) | |
598 * | +--------------------------------------------------+ |
599 * | HH |
600 * | +-----------+ HH +-----------+ |
601 * | | | HH | | |
602 * | | Core |==========| Core | |
603 * | | Complex |==========| Complex | |
604 * | | | HH | | |
605 * | +-----------+ HH +-----------+ |
606 * | HH |
607 * | +--------------------------------------------------+ |
608 * | | Memory Controller | |
609 * | +--------------------------------------------------+ |
610 * | |
611 * +--------------------------------------------------------+
612 *
613 * This image represents a single Zeppelin Die. Note how both cores are
614 * connected to the same memory controller and I/O units. While each core
615 * complex has its own L3 cache as seen in the first image, they both have
616 * uniform access to memory.
617 *
618 *
619 * PP PP
620 * PP PP
621 * +----------PP---------------------PP---------+
622 * | PP PP |
623 * | +-----------+ +-----------+ |
624 * | | | | | |
625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
627 * | | | | | |
628 * | +-----------+ooo ...+-----------+ |
629 * | HH ooo ... HH |
630 * | HH oo.. HH |
631 * | HH ..oo HH |
632 * | HH ... ooo HH |
633 * | +-----------+... ooo+-----------+ |
634 * | | | | | |
635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
637 * | | | | | |
638 * | +-----------+ +-----------+ |
639 * | PP PP |
640 * +----------PP---------------------PP---------+
641 * PP PP
642 * PP PP
643 *
644 * This image represents a single Zen package. In this example, it has four
645 * Zeppelin dies, though some configurations only have a single one. In this
646 * example, each die is directly connected to the next. Also, each die is
647 * represented as being connected to memory by the 'M' character and connected
648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
649 * die is made up of two core complexes, we have multiple different NUMA
650 * domains that we care about for these systems.
651 *
652 * CPUID LEAVES
653 *
654 * There are a few different CPUID leaves that we can use to try and understand
655 * the actual state of the world. As part of the introduction of family 0xf, AMD
656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical
657 * processors that are in the system. Because families before Zen didn't have
658 * SMT, this was always the number of cores that were in the system. However, it
659 * should always be thought of as the number of logical threads to be consistent
660 * between generations. In addition we also get the size of the APIC ID that is
661 * used to represent the number of logical processors. This is important for
662 * deriving topology information.
663 *
664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
665 * bit between Bulldozer and later families, but it is quite useful in
666 * determining the topology information. Because this information has changed
667 * across family generations, it's worth calling out what these mean
668 * explicitly. The registers have the following meanings:
669 *
670 * %eax The APIC ID. The entire register is defined to have a 32-bit
671 * APIC ID, even though on systems without x2apic support, it will
672 * be limited to 8 bits.
673 *
674 * %ebx On Bulldozer-era systems this contains information about the
675 * number of cores that are in a compute unit (cores that share
676 * resources). It also contains a per-package compute unit ID that
677 * identifies which compute unit the logical CPU is a part of.
678 *
679 * On Zen-era systems this instead contains the number of threads
680 * per core and the ID of the core that the logical CPU is a part
681 * of. Note, this ID is unique only to the package, it is not
682 * globally unique across the entire system.
683 *
684 * %ecx This contains the number of nodes that exist in the package. It
685 * also contains an ID that identifies which node the logical CPU
686 * is a part of.
687 *
688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the
689 * cache layout to determine which logical CPUs are sharing which caches.
690 *
691 * illumos Topology
692 * ----------------
693 *
694 * Based on the above we synthesize the information into several different
695 * variables that we store in the 'struct cpuid_info'. We'll go into the details
696 * of what each member is supposed to represent and their uniqueness. In
697 * general, there are two levels of uniqueness that we care about. We care about
698 * an ID that is globally unique. That means that it will be unique across all
699 * entities in the system. For example, the default logical CPU ID is globally
700 * unique. On the other hand, there is some information that we only care about
701 * being unique within the context of a single package / socket. Here are the
702 * variables that we keep track of and their meaning.
703 *
704 * Several of the values that are asking for an identifier, with the exception
705 * of cpi_apicid, are allowed to be synthetic.
706 *
707 *
708 * cpi_apicid
709 *
710 * This is the value of the CPU's APIC id. This should be the full 32-bit
711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
712 * APIC ID. This value is globally unique between all logical CPUs across
713 * all packages. This is usually required by the APIC.
714 *
715 * cpi_chipid
716 *
717 * This value indicates the ID of the package that the logical CPU is a
718 * part of. This value is allowed to be synthetic. It is usually derived by
719 * taking the CPU's APIC ID and determining how many bits are used to
720 * represent CPU cores in the package. All logical CPUs that are part of
721 * the same package must have the same value.
722 *
723 * cpi_coreid
724 *
725 * This represents the ID of a CPU core. Two logical CPUs should only have
726 * the same cpi_coreid value if they are part of the same core. These
727 * values may be synthetic. On systems that support SMT, this value is
728 * usually derived from the APIC ID, otherwise it is often synthetic and
729 * just set to the value of the cpu_id in the cpu_t.
730 *
731 * cpi_pkgcoreid
732 *
733 * This is similar to the cpi_coreid in that logical CPUs that are part of
734 * the same core should have the same ID. The main difference is that these
735 * values are only required to be unique to a given socket.
736 *
737 * cpi_clogid
738 *
739 * This represents the logical ID of a logical CPU. This value should be
740 * unique within a given socket for each logical CPU. This is allowed to be
741 * synthetic, though it is usually based off of the CPU's apic ID. The
742 * broader system expects that logical CPUs that have are part of the same
743 * core have contiguous numbers. For example, if there were two threads per
744 * core, then the core IDs divided by two should be the same and the first
745 * modulus two should be zero and the second one. For example, IDs 4 and 5
746 * indicate two logical CPUs that are part of the same core. But IDs 5 and
747 * 6 represent two logical CPUs that are part of different cores.
748 *
749 * While it is common for the cpi_coreid and the cpi_clogid to be derived
750 * from the same source, strictly speaking, they don't have to be and the
751 * two values should be considered logically independent. One should not
752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
753 * some kind of relationship. While this is tempting, we've seen cases on
754 * AMD family 0xf where the system's cpu id is not related to its APIC ID.
755 *
756 * cpi_ncpu_per_chip
757 *
758 * This value indicates the total number of logical CPUs that exist in the
759 * physical package. Critically, this is not the number of logical CPUs
760 * that exist for just the single core.
761 *
762 * This value should be the same for all logical CPUs in the same package.
763 *
764 * cpi_ncore_per_chip
765 *
766 * This value indicates the total number of physical CPU cores that exist
767 * in the package. The system compares this value with cpi_ncpu_per_chip to
768 * determine if simultaneous multi-threading (SMT) is enabled. When
769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
770 * the X86FSET_HTT feature is not set. If this value is greater than one,
771 * than we consider the processor to have the feature X86FSET_CMP, to
772 * indicate that there is support for more than one core.
773 *
774 * This value should be the same for all logical CPUs in the same package.
775 *
776 * cpi_procnodes_per_pkg
777 *
778 * This value indicates the number of 'nodes' that exist in the package.
779 * When processors are actually a multi-chip module, this represents the
780 * number of such modules that exist in the package. Currently, on Intel
781 * based systems this member is always set to 1.
782 *
783 * This value should be the same for all logical CPUs in the same package.
784 *
785 * cpi_procnodeid
786 *
787 * This value indicates the ID of the node that the logical CPU is a part
788 * of. All logical CPUs that are in the same node must have the same value
789 * here. This value must be unique across all of the packages in the
790 * system. On Intel based systems, this is currently set to the value in
791 * cpi_chipid because there is only one node.
792 *
793 * cpi_cores_per_compunit
794 *
795 * This value indicates the number of cores that are part of a compute
796 * unit. See the AMD topology section for this. This member only has real
797 * meaning currently for AMD Bulldozer family processors. For all other
798 * processors, this should currently be set to 1.
799 *
800 * cpi_compunitid
801 *
802 * This indicates the compute unit that the logical CPU belongs to. For
803 * processors without AMD Bulldozer-style compute units this should be set
804 * to the value of cpi_coreid.
805 *
806 * cpi_ncpu_shr_last_cache
807 *
808 * This indicates the number of logical CPUs that are sharing the same last
809 * level cache. This value should be the same for all CPUs that are sharing
810 * that cache. The last cache refers to the cache that is closest to memory
811 * and furthest away from the CPU.
812 *
813 * cpi_last_lvl_cacheid
814 *
815 * This indicates the ID of the last cache that the logical CPU uses. This
816 * cache is often shared between multiple logical CPUs and is the cache
817 * that is closest to memory and furthest away from the CPU. This value
818 * should be the same for a group of logical CPUs only if they actually
819 * share the same last level cache. IDs should not overlap between
820 * packages.
821 *
822 * cpi_ncore_bits
823 *
824 * This indicates the number of bits that are required to represent all of
825 * the cores in the system. As cores are derived based on their APIC IDs,
826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
827 * this value to be larger than the actual number of IDs that are present
828 * in the system. This is used to size tables by the CMI framework. It is
829 * only filled in for Intel and AMD CPUs.
830 *
831 * cpi_nthread_bits
832 *
833 * This indicates the number of bits required to represent all of the IDs
834 * that cover the logical CPUs that exist on a given core. It's OK for this
835 * value to be larger than the actual number of IDs that are present in the
836 * system. This is used to size tables by the CMI framework. It is
837 * only filled in for Intel and AMD CPUs.
838 *
839 * -----------
840 * Hypervisors
841 * -----------
842 *
843 * If trying to manage the differences between vendors wasn't bad enough, it can
844 * get worse thanks to our friend hardware virtualization. Hypervisors are given
845 * the ability to interpose on all cpuid instructions and change them to suit
846 * their purposes. In general, this is necessary as the hypervisor wants to be
847 * able to present a more uniform set of features or not necessarily give the
848 * guest operating system kernel knowledge of all features so it can be
849 * more easily migrated between systems.
850 *
851 * When it comes to trying to determine topology information, this can be a
852 * double edged sword. When a hypervisor doesn't actually implement a cpuid
853 * leaf, it'll often return all zeros. Because of that, you'll often see various
854 * checks scattered about fields being non-zero before we assume we can use
855 * them.
856 *
857 * When it comes to topology information, the hypervisor is often incentivized
858 * to lie to you about topology. This is because it doesn't always actually
859 * guarantee that topology at all. The topology path we take in the system
860 * depends on how the CPU advertises itself. If it advertises itself as an Intel
861 * or AMD CPU, then we basically do our normal path. However, when they don't
862 * use an actual vendor, then that usually turns into multiple one-core CPUs
863 * that we enumerate that are often on different sockets. The actual behavior
864 * depends greatly on what the hypervisor actually exposes to us.
865 *
866 * --------------------
867 * Exposing Information
868 * --------------------
869 *
870 * We expose CPUID information in three different forms in the system.
871 *
872 * The first is through the x86_featureset variable. This is used in conjunction
873 * with the is_x86_feature() function. This is queried by x86-specific functions
874 * to determine which features are or aren't present in the system and to make
875 * decisions based upon them. For example, users of this include everything from
876 * parts of the system dedicated to reliability, availability, and
877 * serviceability (RAS), to making decisions about how to handle security
878 * mitigations, to various x86-specific drivers. General purpose or
879 * architecture independent drivers should never be calling this function.
880 *
881 * The second means is through the auxiliary vector. The auxiliary vector is a
882 * series of tagged data that the kernel passes down to a user program when it
883 * begins executing. This information is used to indicate to programs what
884 * instruction set extensions are present. For example, information about the
885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down
886 * since user programs cannot make use of it. However, things like the AVX
887 * instruction sets are. Programs use this information to make run-time
888 * decisions about what features they should use. As an example, the run-time
889 * link-editor (rtld) can relocate different functions depending on the hardware
890 * support available.
891 *
892 * The final form is through a series of accessor functions that all have the
893 * form cpuid_get*. This is used by a number of different subsystems in the
894 * kernel to determine more detailed information about what we're running on,
895 * topology information, etc. Some of these subsystems include processor groups
896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
897 * microcode, and performance monitoring. These functions all ASSERT that the
898 * CPU they're being called on has reached a certain cpuid pass. If the passes
899 * are rearranged, then this needs to be adjusted.
900 */
901
902 #include <sys/types.h>
903 #include <sys/archsystm.h>
904 #include <sys/x86_archext.h>
905 #include <sys/kmem.h>
906 #include <sys/systm.h>
907 #include <sys/cmn_err.h>
908 #include <sys/sunddi.h>
909 #include <sys/sunndi.h>
910 #include <sys/cpuvar.h>
911 #include <sys/processor.h>
912 #include <sys/sysmacros.h>
913 #include <sys/pg.h>
914 #include <sys/fp.h>
915 #include <sys/controlregs.h>
916 #include <sys/bitmap.h>
917 #include <sys/auxv_386.h>
918 #include <sys/memnode.h>
919 #include <sys/pci_cfgspace.h>
920 #include <sys/comm_page.h>
921 #include <sys/mach_mmu.h>
922 #include <sys/ucode.h>
923 #include <sys/tsc.h>
924
925 #ifdef __xpv
926 #include <sys/hypervisor.h>
927 #else
928 #include <sys/ontrap.h>
929 #endif
930
931 uint_t x86_vendor = X86_VENDOR_IntelClone;
932 uint_t x86_type = X86_TYPE_OTHER;
933 uint_t x86_clflush_size = 0;
934
935 #if defined(__xpv)
936 int x86_use_pcid = 0;
937 int x86_use_invpcid = 0;
938 #else
939 int x86_use_pcid = -1;
940 int x86_use_invpcid = -1;
941 #endif
942
943 uint_t pentiumpro_bug4046376;
944
945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
946
947 static char *x86_feature_names[NUM_X86_FEATURES] = {
948 "lgpg",
949 "tsc",
950 "msr",
951 "mtrr",
952 "pge",
953 "de",
954 "cmov",
955 "mmx",
956 "mca",
957 "pae",
958 "cv8",
959 "pat",
960 "sep",
961 "sse",
962 "sse2",
963 "htt",
964 "asysc",
965 "nx",
966 "sse3",
967 "cx16",
968 "cmp",
969 "tscp",
970 "mwait",
971 "sse4a",
972 "cpuid",
973 "ssse3",
974 "sse4_1",
975 "sse4_2",
976 "1gpg",
977 "clfsh",
978 "64",
979 "aes",
980 "pclmulqdq",
981 "xsave",
982 "avx",
983 "vmx",
984 "svm",
985 "topoext",
986 "f16c",
987 "rdrand",
988 "x2apic",
989 "avx2",
990 "bmi1",
991 "bmi2",
992 "fma",
993 "smep",
994 "smap",
995 "adx",
996 "rdseed",
997 "mpx",
998 "avx512f",
999 "avx512dq",
1000 "avx512pf",
1001 "avx512er",
1002 "avx512cd",
1003 "avx512bw",
1004 "avx512vl",
1005 "avx512fma",
1006 "avx512vbmi",
1007 "avx512_vpopcntdq",
1008 "avx512_4vnniw",
1009 "avx512_4fmaps",
1010 "xsaveopt",
1011 "xsavec",
1012 "xsaves",
1013 "sha",
1014 "umip",
1015 "pku",
1016 "ospke",
1017 "pcid",
1018 "invpcid",
1019 "ibrs",
1020 "ibpb",
1021 "stibp",
1022 "ssbd",
1023 "ssbd_virt",
1024 "rdcl_no",
1025 "ibrs_all",
1026 "rsba",
1027 "ssb_no",
1028 "stibp_all",
1029 "flush_cmd",
1030 "l1d_vmentry_no",
1031 "fsgsbase",
1032 "clflushopt",
1033 "clwb",
1034 "monitorx",
1035 "clzero",
1036 "xop",
1037 "fma4",
1038 "tbm",
1039 "avx512_vnni"
1040 };
1041
1042 boolean_t
1043 is_x86_feature(void *featureset, uint_t feature)
1044 {
1045 ASSERT(feature < NUM_X86_FEATURES);
1046 return (BT_TEST((ulong_t *)featureset, feature));
1047 }
1048
1049 void
1050 add_x86_feature(void *featureset, uint_t feature)
1051 {
1052 ASSERT(feature < NUM_X86_FEATURES);
1053 BT_SET((ulong_t *)featureset, feature);
1054 }
1055
1056 void
1057 remove_x86_feature(void *featureset, uint_t feature)
1058 {
1059 ASSERT(feature < NUM_X86_FEATURES);
1060 BT_CLEAR((ulong_t *)featureset, feature);
1061 }
1062
1063 boolean_t
1064 compare_x86_featureset(void *setA, void *setB)
1065 {
1066 /*
1067 * We assume that the unused bits of the bitmap are always zero.
1068 */
1069 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1070 return (B_TRUE);
1071 } else {
1072 return (B_FALSE);
1073 }
1074 }
1075
1076 void
1077 print_x86_featureset(void *featureset)
1078 {
1079 uint_t i;
1080
1081 for (i = 0; i < NUM_X86_FEATURES; i++) {
1082 if (is_x86_feature(featureset, i)) {
1083 cmn_err(CE_CONT, "?x86_feature: %s\n",
1084 x86_feature_names[i]);
1085 }
1086 }
1087 }
1088
1089 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1090 static size_t xsave_state_size = 0;
1091 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1092 boolean_t xsave_force_disable = B_FALSE;
1093 extern int disable_smap;
1094
1095 /*
1096 * This is set to platform type we are running on.
1097 */
1098 static int platform_type = -1;
1099
1100 #if !defined(__xpv)
1101 /*
1102 * Variable to patch if hypervisor platform detection needs to be
1103 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1104 */
1105 int enable_platform_detection = 1;
1106 #endif
1107
1108 /*
1109 * monitor/mwait info.
1110 *
1111 * size_actual and buf_actual are the real address and size allocated to get
1112 * proper mwait_buf alignement. buf_actual and size_actual should be passed
1113 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
1114 * processor cache-line alignment, but this is not guarantied in the furture.
1115 */
1116 struct mwait_info {
1117 size_t mon_min; /* min size to avoid missed wakeups */
1118 size_t mon_max; /* size to avoid false wakeups */
1119 size_t size_actual; /* size actually allocated */
1120 void *buf_actual; /* memory actually allocated */
1121 uint32_t support; /* processor support of monitor/mwait */
1122 };
1123
1124 /*
1125 * xsave/xrestor info.
1126 *
1127 * This structure contains HW feature bits and the size of the xsave save area.
1128 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1129 * (xsave_state) to describe the xsave layout. However, at runtime the
1130 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1131 * xsave_state structure simply represents the legacy layout of the beginning
1132 * of the xsave area.
1133 */
1134 struct xsave_info {
1135 uint32_t xsav_hw_features_low; /* Supported HW features */
1136 uint32_t xsav_hw_features_high; /* Supported HW features */
1137 size_t xsav_max_size; /* max size save area for HW features */
1138 size_t ymm_size; /* AVX: size of ymm save area */
1139 size_t ymm_offset; /* AVX: offset for ymm save area */
1140 size_t bndregs_size; /* MPX: size of bndregs save area */
1141 size_t bndregs_offset; /* MPX: offset for bndregs save area */
1142 size_t bndcsr_size; /* MPX: size of bndcsr save area */
1143 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */
1144 size_t opmask_size; /* AVX512: size of opmask save */
1145 size_t opmask_offset; /* AVX512: offset for opmask save */
1146 size_t zmmlo_size; /* AVX512: size of zmm 256 save */
1147 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */
1148 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */
1149 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */
1150 };
1151
1152
1153 /*
1154 * These constants determine how many of the elements of the
1155 * cpuid we cache in the cpuid_info data structure; the
1156 * remaining elements are accessible via the cpuid instruction.
1157 */
1158
1159 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
1160 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
1161
1162 /*
1163 * See the big theory statement for a more detailed explanation of what some of
1164 * these members mean.
1165 */
1166 struct cpuid_info {
1167 uint_t cpi_pass; /* last pass completed */
1168 /*
1169 * standard function information
1170 */
1171 uint_t cpi_maxeax; /* fn 0: %eax */
1172 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
1173 uint_t cpi_vendor; /* enum of cpi_vendorstr */
1174
1175 uint_t cpi_family; /* fn 1: extended family */
1176 uint_t cpi_model; /* fn 1: extended model */
1177 uint_t cpi_step; /* fn 1: stepping */
1178 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
1179 /* AMD: package/socket # */
1180 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
1181 int cpi_clogid; /* fn 1: %ebx: thread # */
1182 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
1183 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
1184 uint_t cpi_ncache; /* fn 2: number of elements */
1185 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1186 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
1187 uint_t cpi_cache_leaf_size; /* Number of cache elements */
1188 /* Intel fn: 4, AMD fn: 8000001d */
1189 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */
1190 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
1191 /*
1192 * extended function information
1193 */
1194 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
1195 char cpi_brandstr[49]; /* fn 0x8000000[234] */
1196 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
1197 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
1198 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */
1199 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
1200
1201 id_t cpi_coreid; /* same coreid => strands share core */
1202 int cpi_pkgcoreid; /* core number within single package */
1203 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
1204 /* Intel: fn 4: %eax[31-26] */
1205
1206 /*
1207 * These values represent the number of bits that are required to store
1208 * information about the number of cores and threads.
1209 */
1210 uint_t cpi_ncore_bits;
1211 uint_t cpi_nthread_bits;
1212 /*
1213 * supported feature information
1214 */
1215 uint32_t cpi_support[6];
1216 #define STD_EDX_FEATURES 0
1217 #define AMD_EDX_FEATURES 1
1218 #define TM_EDX_FEATURES 2
1219 #define STD_ECX_FEATURES 3
1220 #define AMD_ECX_FEATURES 4
1221 #define STD_EBX_FEATURES 5
1222 /*
1223 * Synthesized information, where known.
1224 */
1225 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
1226 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
1227 uint32_t cpi_socket; /* Chip package/socket type */
1228
1229 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
1230 uint32_t cpi_apicid;
1231 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
1232 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
1233 /* Intel: 1 */
1234 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
1235 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
1236
1237 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
1238 };
1239
1240
1241 static struct cpuid_info cpuid_info0;
1242
1243 /*
1244 * These bit fields are defined by the Intel Application Note AP-485
1245 * "Intel Processor Identification and the CPUID Instruction"
1246 */
1247 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1248 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1249 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1250 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1251 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1252 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1253
1254 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
1255 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
1256 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
1257 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
1258 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
1259 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx)
1260 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx)
1261
1262 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1263 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1264 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1265 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1266
1267 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
1268 #define CPI_XMAXEAX_MAX 0x80000100
1269 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
1270 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
1271
1272 /*
1273 * Function 4 (Deterministic Cache Parameters) macros
1274 * Defined by Intel Application Note AP-485
1275 */
1276 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
1277 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
1278 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
1279 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
1280 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
1281 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
1282 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
1283
1284 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
1285 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
1286 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
1287
1288 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
1289
1290 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
1291
1292
1293 /*
1294 * A couple of shorthand macros to identify "later" P6-family chips
1295 * like the Pentium M and Core. First, the "older" P6-based stuff
1296 * (loosely defined as "pre-Pentium-4"):
1297 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1298 */
1299 #define IS_LEGACY_P6(cpi) ( \
1300 cpi->cpi_family == 6 && \
1301 (cpi->cpi_model == 1 || \
1302 cpi->cpi_model == 3 || \
1303 cpi->cpi_model == 5 || \
1304 cpi->cpi_model == 6 || \
1305 cpi->cpi_model == 7 || \
1306 cpi->cpi_model == 8 || \
1307 cpi->cpi_model == 0xA || \
1308 cpi->cpi_model == 0xB) \
1309 )
1310
1311 /* A "new F6" is everything with family 6 that's not the above */
1312 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1313
1314 /* Extended family/model support */
1315 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1316 cpi->cpi_family >= 0xf)
1317
1318 /*
1319 * Info for monitor/mwait idle loop.
1320 *
1321 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1322 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1323 * 2006.
1324 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1325 * Documentation Updates" #33633, Rev 2.05, December 2006.
1326 */
1327 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
1328 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
1329 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
1330 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1331 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
1332 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
1333 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1334 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1335 /*
1336 * Number of sub-cstates for a given c-state.
1337 */
1338 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
1339 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1340
1341 /*
1342 * XSAVE leaf 0xD enumeration
1343 */
1344 #define CPUID_LEAFD_2_YMM_OFFSET 576
1345 #define CPUID_LEAFD_2_YMM_SIZE 256
1346
1347 /*
1348 * Common extended leaf names to cut down on typos.
1349 */
1350 #define CPUID_LEAF_EXT_0 0x80000000
1351 #define CPUID_LEAF_EXT_8 0x80000008
1352 #define CPUID_LEAF_EXT_1d 0x8000001d
1353 #define CPUID_LEAF_EXT_1e 0x8000001e
1354
1355 /*
1356 * Functions we consune from cpuid_subr.c; don't publish these in a header
1357 * file to try and keep people using the expected cpuid_* interfaces.
1358 */
1359 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1360 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1361 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1362 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1363 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1364
1365 /*
1366 * Apply up various platform-dependent restrictions where the
1367 * underlying platform restrictions mean the CPU can be marked
1368 * as less capable than its cpuid instruction would imply.
1369 */
1370 #if defined(__xpv)
1371 static void
1372 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1373 {
1374 switch (eax) {
1375 case 1: {
1376 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1377 0 : CPUID_INTC_EDX_MCA;
1378 cp->cp_edx &=
1379 ~(mcamask |
1380 CPUID_INTC_EDX_PSE |
1381 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1382 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1383 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1384 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1385 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1386 break;
1387 }
1388
1389 case 0x80000001:
1390 cp->cp_edx &=
1391 ~(CPUID_AMD_EDX_PSE |
1392 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1393 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1394 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1395 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1396 CPUID_AMD_EDX_TSCP);
1397 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1398 break;
1399 default:
1400 break;
1401 }
1402
1403 switch (vendor) {
1404 case X86_VENDOR_Intel:
1405 switch (eax) {
1406 case 4:
1407 /*
1408 * Zero out the (ncores-per-chip - 1) field
1409 */
1410 cp->cp_eax &= 0x03fffffff;
1411 break;
1412 default:
1413 break;
1414 }
1415 break;
1416 case X86_VENDOR_AMD:
1417 switch (eax) {
1418
1419 case 0x80000001:
1420 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1421 break;
1422
1423 case CPUID_LEAF_EXT_8:
1424 /*
1425 * Zero out the (ncores-per-chip - 1) field
1426 */
1427 cp->cp_ecx &= 0xffffff00;
1428 break;
1429 default:
1430 break;
1431 }
1432 break;
1433 default:
1434 break;
1435 }
1436 }
1437 #else
1438 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
1439 #endif
1440
1441 /*
1442 * Some undocumented ways of patching the results of the cpuid
1443 * instruction to permit running Solaris 10 on future cpus that
1444 * we don't currently support. Could be set to non-zero values
1445 * via settings in eeprom.
1446 */
1447
1448 uint32_t cpuid_feature_ecx_include;
1449 uint32_t cpuid_feature_ecx_exclude;
1450 uint32_t cpuid_feature_edx_include;
1451 uint32_t cpuid_feature_edx_exclude;
1452
1453 /*
1454 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1455 */
1456 void
1457 cpuid_alloc_space(cpu_t *cpu)
1458 {
1459 /*
1460 * By convention, cpu0 is the boot cpu, which is set up
1461 * before memory allocation is available. All other cpus get
1462 * their cpuid_info struct allocated here.
1463 */
1464 ASSERT(cpu->cpu_id != 0);
1465 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1466 cpu->cpu_m.mcpu_cpi =
1467 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1468 }
1469
1470 void
1471 cpuid_free_space(cpu_t *cpu)
1472 {
1473 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1474 int i;
1475
1476 ASSERT(cpi != NULL);
1477 ASSERT(cpi != &cpuid_info0);
1478
1479 /*
1480 * Free up any cache leaf related dynamic storage. The first entry was
1481 * cached from the standard cpuid storage, so we should not free it.
1482 */
1483 for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1484 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1485 if (cpi->cpi_cache_leaf_size > 0)
1486 kmem_free(cpi->cpi_cache_leaves,
1487 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1488
1489 kmem_free(cpi, sizeof (*cpi));
1490 cpu->cpu_m.mcpu_cpi = NULL;
1491 }
1492
1493 #if !defined(__xpv)
1494 /*
1495 * Determine the type of the underlying platform. This is used to customize
1496 * initialization of various subsystems (e.g. TSC). determine_platform() must
1497 * only ever be called once to prevent two processors from seeing different
1498 * values of platform_type. Must be called before cpuid_pass1(), the earliest
1499 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1500 */
1501 void
1502 determine_platform(void)
1503 {
1504 struct cpuid_regs cp;
1505 uint32_t base;
1506 uint32_t regs[4];
1507 char *hvstr = (char *)regs;
1508
1509 ASSERT(platform_type == -1);
1510
1511 platform_type = HW_NATIVE;
1512
1513 if (!enable_platform_detection)
1514 return;
1515
1516 /*
1517 * If Hypervisor CPUID bit is set, try to determine hypervisor
1518 * vendor signature, and set platform type accordingly.
1519 *
1520 * References:
1521 * http://lkml.org/lkml/2008/10/1/246
1522 * http://kb.vmware.com/kb/1009458
1523 */
1524 cp.cp_eax = 0x1;
1525 (void) __cpuid_insn(&cp);
1526 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1527 cp.cp_eax = 0x40000000;
1528 (void) __cpuid_insn(&cp);
1529 regs[0] = cp.cp_ebx;
1530 regs[1] = cp.cp_ecx;
1531 regs[2] = cp.cp_edx;
1532 regs[3] = 0;
1533 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1534 platform_type = HW_XEN_HVM;
1535 return;
1536 }
1537 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1538 platform_type = HW_VMWARE;
1539 return;
1540 }
1541 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1542 platform_type = HW_KVM;
1543 return;
1544 }
1545 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1546 platform_type = HW_BHYVE;
1547 return;
1548 }
1549 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1550 platform_type = HW_MICROSOFT;
1551 } else {
1552 /*
1553 * Check older VMware hardware versions. VMware hypervisor is
1554 * detected by performing an IN operation to VMware hypervisor
1555 * port and checking that value returned in %ebx is VMware
1556 * hypervisor magic value.
1557 *
1558 * References: http://kb.vmware.com/kb/1009458
1559 */
1560 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1561 if (regs[1] == VMWARE_HVMAGIC) {
1562 platform_type = HW_VMWARE;
1563 return;
1564 }
1565 }
1566
1567 /*
1568 * Check Xen hypervisor. In a fully virtualized domain,
1569 * Xen's pseudo-cpuid function returns a string representing the
1570 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1571 * supported cpuid function. We need at least a (base + 2) leaf value
1572 * to do what we want to do. Try different base values, since the
1573 * hypervisor might use a different one depending on whether Hyper-V
1574 * emulation is switched on by default or not.
1575 */
1576 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1577 cp.cp_eax = base;
1578 (void) __cpuid_insn(&cp);
1579 regs[0] = cp.cp_ebx;
1580 regs[1] = cp.cp_ecx;
1581 regs[2] = cp.cp_edx;
1582 regs[3] = 0;
1583 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1584 cp.cp_eax >= (base + 2)) {
1585 platform_type &= ~HW_NATIVE;
1586 platform_type |= HW_XEN_HVM;
1587 return;
1588 }
1589 }
1590 }
1591
1592 int
1593 get_hwenv(void)
1594 {
1595 ASSERT(platform_type != -1);
1596 return (platform_type);
1597 }
1598
1599 int
1600 is_controldom(void)
1601 {
1602 return (0);
1603 }
1604
1605 #else
1606
1607 int
1608 get_hwenv(void)
1609 {
1610 return (HW_XEN_PV);
1611 }
1612
1613 int
1614 is_controldom(void)
1615 {
1616 return (DOMAIN_IS_INITDOMAIN(xen_info));
1617 }
1618
1619 #endif /* __xpv */
1620
1621 /*
1622 * Make sure that we have gathered all of the CPUID leaves that we might need to
1623 * determine topology. We assume that the standard leaf 1 has already been done
1624 * and that xmaxeax has already been calculated.
1625 */
1626 static void
1627 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1628 {
1629 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1630
1631 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1632 struct cpuid_regs *cp;
1633
1634 cp = &cpi->cpi_extd[8];
1635 cp->cp_eax = CPUID_LEAF_EXT_8;
1636 (void) __cpuid_insn(cp);
1637 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1638 }
1639
1640 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1641 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1642 struct cpuid_regs *cp;
1643
1644 cp = &cpi->cpi_extd[0x1e];
1645 cp->cp_eax = CPUID_LEAF_EXT_1e;
1646 (void) __cpuid_insn(cp);
1647 }
1648 }
1649
1650 /*
1651 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1652 * it to everything else. If not, and we're on an AMD system where 8000001e is
1653 * valid, then we use that. Othewrise, we fall back to the default value for the
1654 * APIC ID in leaf 1.
1655 */
1656 static uint32_t
1657 cpuid_gather_apicid(struct cpuid_info *cpi)
1658 {
1659 /*
1660 * Leaf B changes based on the arguments to it. Beacuse we don't cache
1661 * it, we need to gather it again.
1662 */
1663 if (cpi->cpi_maxeax >= 0xB) {
1664 struct cpuid_regs regs;
1665 struct cpuid_regs *cp;
1666
1667 cp = ®s;
1668 cp->cp_eax = 0xB;
1669 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1670 (void) __cpuid_insn(cp);
1671
1672 if (cp->cp_ebx != 0) {
1673 return (cp->cp_edx);
1674 }
1675 }
1676
1677 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1678 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1679 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1680 return (cpi->cpi_extd[0x1e].cp_eax);
1681 }
1682
1683 return (CPI_APIC_ID(cpi));
1684 }
1685
1686 /*
1687 * For AMD processors, attempt to calculate the number of chips and cores that
1688 * exist. The way that we do this varies based on the generation, because the
1689 * generations themselves have changed dramatically.
1690 *
1691 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1692 * However, with the advent of family 17h (Zen) it actually tells us the number
1693 * of threads, so we need to look at leaf 0x8000001e if available to determine
1694 * its value. Otherwise, for all prior families, the number of enabled cores is
1695 * the same as threads.
1696 *
1697 * If we do not have leaf 0x80000008, then we assume that this processor does
1698 * not have anything. AMD's older CPUID specification says there's no reason to
1699 * fall back to leaf 1.
1700 *
1701 * In some virtualization cases we will not have leaf 8000001e or it will be
1702 * zero. When that happens we assume the number of threads is one.
1703 */
1704 static void
1705 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1706 {
1707 uint_t nthreads, nthread_per_core;
1708
1709 nthreads = nthread_per_core = 1;
1710
1711 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1712 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1713 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1714 nthreads = CPI_CPU_COUNT(cpi);
1715 }
1716
1717 /*
1718 * For us to have threads, and know about it, we have to be at least at
1719 * family 17h and have the cpuid bit that says we have extended
1720 * topology.
1721 */
1722 if (cpi->cpi_family >= 0x17 &&
1723 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1724 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1725 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1726 }
1727
1728 *ncpus = nthreads;
1729 *ncores = nthreads / nthread_per_core;
1730 }
1731
1732 /*
1733 * Seed the initial values for the cores and threads for an Intel based
1734 * processor. These values will be overwritten if we detect that the processor
1735 * supports CPUID leaf 0xb.
1736 */
1737 static void
1738 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1739 {
1740 /*
1741 * Only seed the number of physical cores from the first level leaf 4
1742 * information. The number of threads there indicate how many share the
1743 * L1 cache, which may or may not have anything to do with the number of
1744 * logical CPUs per core.
1745 */
1746 if (cpi->cpi_maxeax >= 4) {
1747 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1748 } else {
1749 *ncores = 1;
1750 }
1751
1752 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1753 *ncpus = CPI_CPU_COUNT(cpi);
1754 } else {
1755 *ncpus = *ncores;
1756 }
1757 }
1758
1759 static boolean_t
1760 cpuid_leafB_getids(cpu_t *cpu)
1761 {
1762 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1763 struct cpuid_regs regs;
1764 struct cpuid_regs *cp;
1765
1766 if (cpi->cpi_maxeax < 0xB)
1767 return (B_FALSE);
1768
1769 cp = ®s;
1770 cp->cp_eax = 0xB;
1771 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1772
1773 (void) __cpuid_insn(cp);
1774
1775 /*
1776 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1777 * indicates that the extended topology enumeration leaf is
1778 * available.
1779 */
1780 if (cp->cp_ebx != 0) {
1781 uint32_t x2apic_id = 0;
1782 uint_t coreid_shift = 0;
1783 uint_t ncpu_per_core = 1;
1784 uint_t chipid_shift = 0;
1785 uint_t ncpu_per_chip = 1;
1786 uint_t i;
1787 uint_t level;
1788
1789 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1790 cp->cp_eax = 0xB;
1791 cp->cp_ecx = i;
1792
1793 (void) __cpuid_insn(cp);
1794 level = CPI_CPU_LEVEL_TYPE(cp);
1795
1796 if (level == 1) {
1797 x2apic_id = cp->cp_edx;
1798 coreid_shift = BITX(cp->cp_eax, 4, 0);
1799 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1800 } else if (level == 2) {
1801 x2apic_id = cp->cp_edx;
1802 chipid_shift = BITX(cp->cp_eax, 4, 0);
1803 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1804 }
1805 }
1806
1807 /*
1808 * cpi_apicid is taken care of in cpuid_gather_apicid.
1809 */
1810 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1811 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1812 ncpu_per_core;
1813 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1814 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1815 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1816 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1817 cpi->cpi_procnodeid = cpi->cpi_chipid;
1818 cpi->cpi_compunitid = cpi->cpi_coreid;
1819
1820 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1821 cpi->cpi_nthread_bits = coreid_shift;
1822 cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1823 }
1824
1825 return (B_TRUE);
1826 } else {
1827 return (B_FALSE);
1828 }
1829 }
1830
1831 static void
1832 cpuid_intel_getids(cpu_t *cpu, void *feature)
1833 {
1834 uint_t i;
1835 uint_t chipid_shift = 0;
1836 uint_t coreid_shift = 0;
1837 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1838
1839 /*
1840 * There are no compute units or processor nodes currently on Intel.
1841 * Always set these to one.
1842 */
1843 cpi->cpi_procnodes_per_pkg = 1;
1844 cpi->cpi_cores_per_compunit = 1;
1845
1846 /*
1847 * If cpuid Leaf B is present, use that to try and get this information.
1848 * It will be the most accurate for Intel CPUs.
1849 */
1850 if (cpuid_leafB_getids(cpu))
1851 return;
1852
1853 /*
1854 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1855 * and ncore_per_chip. These represent the largest power of two values
1856 * that we need to cover all of the IDs in the system. Therefore, we use
1857 * those values to seed the number of bits needed to cover information
1858 * in the case when leaf B is not available. These values will probably
1859 * be larger than required, but that's OK.
1860 */
1861 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1862 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1863
1864 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1865 chipid_shift++;
1866
1867 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1868 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1869
1870 if (is_x86_feature(feature, X86FSET_CMP)) {
1871 /*
1872 * Multi-core (and possibly multi-threaded)
1873 * processors.
1874 */
1875 uint_t ncpu_per_core;
1876 if (cpi->cpi_ncore_per_chip == 1)
1877 ncpu_per_core = cpi->cpi_ncpu_per_chip;
1878 else if (cpi->cpi_ncore_per_chip > 1)
1879 ncpu_per_core = cpi->cpi_ncpu_per_chip /
1880 cpi->cpi_ncore_per_chip;
1881 /*
1882 * 8bit APIC IDs on dual core Pentiums
1883 * look like this:
1884 *
1885 * +-----------------------+------+------+
1886 * | Physical Package ID | MC | HT |
1887 * +-----------------------+------+------+
1888 * <------- chipid -------->
1889 * <------- coreid --------------->
1890 * <--- clogid -->
1891 * <------>
1892 * pkgcoreid
1893 *
1894 * Where the number of bits necessary to
1895 * represent MC and HT fields together equals
1896 * to the minimum number of bits necessary to
1897 * store the value of cpi->cpi_ncpu_per_chip.
1898 * Of those bits, the MC part uses the number
1899 * of bits necessary to store the value of
1900 * cpi->cpi_ncore_per_chip.
1901 */
1902 for (i = 1; i < ncpu_per_core; i <<= 1)
1903 coreid_shift++;
1904 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1905 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1906 } else if (is_x86_feature(feature, X86FSET_HTT)) {
1907 /*
1908 * Single-core multi-threaded processors.
1909 */
1910 cpi->cpi_coreid = cpi->cpi_chipid;
1911 cpi->cpi_pkgcoreid = 0;
1912 } else {
1913 /*
1914 * Single-core single-thread processors.
1915 */
1916 cpi->cpi_coreid = cpu->cpu_id;
1917 cpi->cpi_pkgcoreid = 0;
1918 }
1919 cpi->cpi_procnodeid = cpi->cpi_chipid;
1920 cpi->cpi_compunitid = cpi->cpi_coreid;
1921 }
1922
1923 /*
1924 * Historically, AMD has had CMP chips with only a single thread per core.
1925 * However, starting in family 17h (Zen), this has changed and they now have
1926 * multiple threads. Our internal core id needs to be a unique value.
1927 *
1928 * To determine the core id of an AMD system, if we're from a family before 17h,
1929 * then we just use the cpu id, as that gives us a good value that will be
1930 * unique for each core. If instead, we're on family 17h or later, then we need
1931 * to do something more complicated. CPUID leaf 0x8000001e can tell us
1932 * how many threads are in the system. Based on that, we'll shift the APIC ID.
1933 * We can't use the normal core id in that leaf as it's only unique within the
1934 * socket, which is perfect for cpi_pkgcoreid, but not us.
1935 */
1936 static id_t
1937 cpuid_amd_get_coreid(cpu_t *cpu)
1938 {
1939 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1940
1941 if (cpi->cpi_family >= 0x17 &&
1942 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1943 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1944 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1945 if (nthreads > 1) {
1946 VERIFY3U(nthreads, ==, 2);
1947 return (cpi->cpi_apicid >> 1);
1948 }
1949 }
1950
1951 return (cpu->cpu_id);
1952 }
1953
1954 /*
1955 * IDs on AMD is a more challenging task. This is notable because of the
1956 * following two facts:
1957 *
1958 * 1. Before family 0x17 (Zen), there was no support for SMT and there was
1959 * also no way to get an actual unique core id from the system. As such, we
1960 * synthesize this case by using cpu->cpu_id. This scheme does not,
1961 * however, guarantee that sibling cores of a chip will have sequential
1962 * coreids starting at a multiple of the number of cores per chip - that is
1963 * usually the case, but if the ACPI MADT table is presented in a different
1964 * order then we need to perform a few more gymnastics for the pkgcoreid.
1965 *
1966 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1967 * called compute units. These compute units share the L1I cache, L2 cache,
1968 * and the FPU. To deal with this, a new topology leaf was added in
1969 * 0x8000001e. However, parts of this leaf have different meanings
1970 * once we get to family 0x17.
1971 */
1972
1973 static void
1974 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1975 {
1976 int i, first_half, coreidsz;
1977 uint32_t nb_caps_reg;
1978 uint_t node2_1;
1979 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1980 struct cpuid_regs *cp;
1981
1982 /*
1983 * Calculate the core id (this comes from hardware in family 0x17 if it
1984 * hasn't been stripped by virtualization). We always set the compute
1985 * unit id to the same value. Also, initialize the default number of
1986 * cores per compute unit and nodes per package. This will be
1987 * overwritten when we know information about a particular family.
1988 */
1989 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1990 cpi->cpi_compunitid = cpi->cpi_coreid;
1991 cpi->cpi_cores_per_compunit = 1;
1992 cpi->cpi_procnodes_per_pkg = 1;
1993
1994 /*
1995 * To construct the logical ID, we need to determine how many APIC IDs
1996 * are dedicated to the cores and threads. This is provided for us in
1997 * 0x80000008. However, if it's not present (say due to virtualization),
1998 * then we assume it's one. This should be present on all 64-bit AMD
1999 * processors. It was added in family 0xf (Hammer).
2000 */
2001 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2002 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2003
2004 /*
2005 * In AMD parlance chip is really a node while illumos
2006 * uses chip as equivalent to socket/package.
2007 */
2008 if (coreidsz == 0) {
2009 /* Use legacy method */
2010 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2011 coreidsz++;
2012 if (coreidsz == 0)
2013 coreidsz = 1;
2014 }
2015 } else {
2016 /* Assume single-core part */
2017 coreidsz = 1;
2018 }
2019 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2020
2021 /*
2022 * The package core ID varies depending on the family. For family 17h,
2023 * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2024 * can use the clogid as is. When family 17h is virtualized, the clogid
2025 * should be sufficient as if we don't have valid data in the leaf, then
2026 * we won't think we have SMT, in which case the cpi_clogid should be
2027 * sufficient.
2028 */
2029 if (cpi->cpi_family >= 0x17 &&
2030 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2031 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2032 cpi->cpi_extd[0x1e].cp_ebx != 0) {
2033 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2034 } else {
2035 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2036 }
2037
2038 /*
2039 * Obtain the node ID and compute unit IDs. If we're on family 0x15
2040 * (bulldozer) or newer, then we can derive all of this from leaf
2041 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2042 */
2043 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2044 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2045 cp = &cpi->cpi_extd[0x1e];
2046
2047 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2048 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2049
2050 /*
2051 * For Bulldozer-era CPUs, recalculate the compute unit
2052 * information.
2053 */
2054 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2055 cpi->cpi_cores_per_compunit =
2056 BITX(cp->cp_ebx, 15, 8) + 1;
2057 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2058 (cpi->cpi_ncore_per_chip /
2059 cpi->cpi_cores_per_compunit) *
2060 (cpi->cpi_procnodeid /
2061 cpi->cpi_procnodes_per_pkg);
2062 }
2063 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2064 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2065 } else if (cpi->cpi_family == 0x10) {
2066 /*
2067 * See if we are a multi-node processor.
2068 * All processors in the system have the same number of nodes
2069 */
2070 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
2071 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2072 /* Single-node */
2073 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2074 coreidsz);
2075 } else {
2076
2077 /*
2078 * Multi-node revision D (2 nodes per package
2079 * are supported)
2080 */
2081 cpi->cpi_procnodes_per_pkg = 2;
2082
2083 first_half = (cpi->cpi_pkgcoreid <=
2084 (cpi->cpi_ncore_per_chip/2 - 1));
2085
2086 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2087 /* We are BSP */
2088 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2089 } else {
2090
2091 /* We are AP */
2092 /* NodeId[2:1] bits to use for reading F3xe8 */
2093 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2094
2095 nb_caps_reg =
2096 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2097
2098 /*
2099 * Check IntNodeNum bit (31:30, but bit 31 is
2100 * always 0 on dual-node processors)
2101 */
2102 if (BITX(nb_caps_reg, 30, 30) == 0)
2103 cpi->cpi_procnodeid = node2_1 +
2104 !first_half;
2105 else
2106 cpi->cpi_procnodeid = node2_1 +
2107 first_half;
2108 }
2109 }
2110 } else {
2111 cpi->cpi_procnodeid = 0;
2112 }
2113
2114 cpi->cpi_chipid =
2115 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2116
2117 cpi->cpi_ncore_bits = coreidsz;
2118 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2119 cpi->cpi_ncore_per_chip);
2120 }
2121
2122 static void
2123 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2124 {
2125 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2126
2127 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2128 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2129 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2130 add_x86_feature(featureset, X86FSET_IBPB);
2131 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2132 add_x86_feature(featureset, X86FSET_IBRS);
2133 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2134 add_x86_feature(featureset, X86FSET_STIBP);
2135 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2136 add_x86_feature(featureset, X86FSET_IBRS_ALL);
2137 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2138 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2139 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2140 add_x86_feature(featureset, X86FSET_RSBA);
2141 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2142 add_x86_feature(featureset, X86FSET_SSBD);
2143 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2144 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2145 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2146 add_x86_feature(featureset, X86FSET_SSB_NO);
2147 } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2148 cpi->cpi_maxeax >= 7) {
2149 struct cpuid_regs *ecp;
2150 ecp = &cpi->cpi_std[7];
2151
2152 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2153 add_x86_feature(featureset, X86FSET_IBRS);
2154 add_x86_feature(featureset, X86FSET_IBPB);
2155 }
2156
2157 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2158 add_x86_feature(featureset, X86FSET_STIBP);
2159 }
2160
2161 /*
2162 * Don't read the arch caps MSR on xpv where we lack the
2163 * on_trap().
2164 */
2165 #ifndef __xpv
2166 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2167 on_trap_data_t otd;
2168
2169 /*
2170 * Be paranoid and assume we'll get a #GP.
2171 */
2172 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2173 uint64_t reg;
2174
2175 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2176 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2177 add_x86_feature(featureset,
2178 X86FSET_RDCL_NO);
2179 }
2180 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2181 add_x86_feature(featureset,
2182 X86FSET_IBRS_ALL);
2183 }
2184 if (reg & IA32_ARCH_CAP_RSBA) {
2185 add_x86_feature(featureset,
2186 X86FSET_RSBA);
2187 }
2188 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2189 add_x86_feature(featureset,
2190 X86FSET_L1D_VM_NO);
2191 }
2192 if (reg & IA32_ARCH_CAP_SSB_NO) {
2193 add_x86_feature(featureset,
2194 X86FSET_SSB_NO);
2195 }
2196 }
2197 no_trap();
2198 }
2199 #endif /* !__xpv */
2200
2201 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2202 add_x86_feature(featureset, X86FSET_SSBD);
2203
2204 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2205 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2206 }
2207 }
2208
2209 /*
2210 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2211 */
2212 void
2213 setup_xfem(void)
2214 {
2215 uint64_t flags = XFEATURE_LEGACY_FP;
2216
2217 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2218
2219 if (is_x86_feature(x86_featureset, X86FSET_SSE))
2220 flags |= XFEATURE_SSE;
2221
2222 if (is_x86_feature(x86_featureset, X86FSET_AVX))
2223 flags |= XFEATURE_AVX;
2224
2225 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2226 flags |= XFEATURE_AVX512;
2227
2228 set_xcr(XFEATURE_ENABLED_MASK, flags);
2229
2230 xsave_bv_all = flags;
2231 }
2232
2233 static void
2234 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2235 {
2236 struct cpuid_info *cpi;
2237
2238 cpi = cpu->cpu_m.mcpu_cpi;
2239
2240 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2241 cpuid_gather_amd_topology_leaves(cpu);
2242 }
2243
2244 cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2245
2246 /*
2247 * Before we can calculate the IDs that we should assign to this
2248 * processor, we need to understand how many cores and threads it has.
2249 */
2250 switch (cpi->cpi_vendor) {
2251 case X86_VENDOR_Intel:
2252 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2253 &cpi->cpi_ncore_per_chip);
2254 break;
2255 case X86_VENDOR_AMD:
2256 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2257 &cpi->cpi_ncore_per_chip);
2258 break;
2259 default:
2260 /*
2261 * If we have some other x86 compatible chip, it's not clear how
2262 * they would behave. The most common case is virtualization
2263 * today, though there are also 64-bit VIA chips. Assume that
2264 * all we can get is the basic Leaf 1 HTT information.
2265 */
2266 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2267 cpi->cpi_ncore_per_chip = 1;
2268 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2269 }
2270 break;
2271 }
2272
2273 /*
2274 * Based on the calculated number of threads and cores, potentially
2275 * assign the HTT and CMT features.
2276 */
2277 if (cpi->cpi_ncore_per_chip > 1) {
2278 add_x86_feature(featureset, X86FSET_CMP);
2279 }
2280
2281 if (cpi->cpi_ncpu_per_chip > 1 &&
2282 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2283 add_x86_feature(featureset, X86FSET_HTT);
2284 }
2285
2286 /*
2287 * Now that has been set up, we need to go through and calculate all of
2288 * the rest of the parameters that exist. If we think the CPU doesn't
2289 * have either SMT (HTT) or CMP, then we basically go through and fake
2290 * up information in some way. The most likely case for this is
2291 * virtualization where we have a lot of partial topology information.
2292 */
2293 if (!is_x86_feature(featureset, X86FSET_HTT) &&
2294 !is_x86_feature(featureset, X86FSET_CMP)) {
2295 /*
2296 * This is a single core, single-threaded processor.
2297 */
2298 cpi->cpi_procnodes_per_pkg = 1;
2299 cpi->cpi_cores_per_compunit = 1;
2300 cpi->cpi_compunitid = 0;
2301 cpi->cpi_chipid = -1;
2302 cpi->cpi_clogid = 0;
2303 cpi->cpi_coreid = cpu->cpu_id;
2304 cpi->cpi_pkgcoreid = 0;
2305 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2306 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2307 } else {
2308 cpi->cpi_procnodeid = cpi->cpi_chipid;
2309 }
2310 } else {
2311 switch (cpi->cpi_vendor) {
2312 case X86_VENDOR_Intel:
2313 cpuid_intel_getids(cpu, featureset);
2314 break;
2315 case X86_VENDOR_AMD:
2316 cpuid_amd_getids(cpu, featureset);
2317 break;
2318 default:
2319 /*
2320 * In this case, it's hard to say what we should do.
2321 * We're going to model them to the OS as single core
2322 * threads. We don't have a good identifier for them, so
2323 * we're just going to use the cpu id all on a single
2324 * chip.
2325 *
2326 * This case has historically been different from the
2327 * case above where we don't have HTT or CMP. While they
2328 * could be combined, we've opted to keep it separate to
2329 * minimize the risk of topology changes in weird cases.
2330 */
2331 cpi->cpi_procnodes_per_pkg = 1;
2332 cpi->cpi_cores_per_compunit = 1;
2333 cpi->cpi_chipid = 0;
2334 cpi->cpi_coreid = cpu->cpu_id;
2335 cpi->cpi_clogid = cpu->cpu_id;
2336 cpi->cpi_pkgcoreid = cpu->cpu_id;
2337 cpi->cpi_procnodeid = cpi->cpi_chipid;
2338 cpi->cpi_compunitid = cpi->cpi_coreid;
2339 break;
2340 }
2341 }
2342 }
2343
2344 void
2345 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2346 {
2347 uint32_t mask_ecx, mask_edx;
2348 struct cpuid_info *cpi;
2349 struct cpuid_regs *cp;
2350 int xcpuid;
2351 #if !defined(__xpv)
2352 extern int idle_cpu_prefer_mwait;
2353 #endif
2354
2355 /*
2356 * Space statically allocated for BSP, ensure pointer is set
2357 */
2358 if (cpu->cpu_id == 0) {
2359 if (cpu->cpu_m.mcpu_cpi == NULL)
2360 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2361 }
2362
2363 add_x86_feature(featureset, X86FSET_CPUID);
2364
2365 cpi = cpu->cpu_m.mcpu_cpi;
2366 ASSERT(cpi != NULL);
2367 cp = &cpi->cpi_std[0];
2368 cp->cp_eax = 0;
2369 cpi->cpi_maxeax = __cpuid_insn(cp);
2370 {
2371 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2372 *iptr++ = cp->cp_ebx;
2373 *iptr++ = cp->cp_edx;
2374 *iptr++ = cp->cp_ecx;
2375 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2376 }
2377
2378 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2379 x86_vendor = cpi->cpi_vendor; /* for compatibility */
2380
2381 /*
2382 * Limit the range in case of weird hardware
2383 */
2384 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2385 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2386 if (cpi->cpi_maxeax < 1)
2387 goto pass1_done;
2388
2389 cp = &cpi->cpi_std[1];
2390 cp->cp_eax = 1;
2391 (void) __cpuid_insn(cp);
2392
2393 /*
2394 * Extract identifying constants for easy access.
2395 */
2396 cpi->cpi_model = CPI_MODEL(cpi);
2397 cpi->cpi_family = CPI_FAMILY(cpi);
2398
2399 if (cpi->cpi_family == 0xf)
2400 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2401
2402 /*
2403 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2404 * Intel, and presumably everyone else, uses model == 0xf, as
2405 * one would expect (max value means possible overflow). Sigh.
2406 */
2407
2408 switch (cpi->cpi_vendor) {
2409 case X86_VENDOR_Intel:
2410 if (IS_EXTENDED_MODEL_INTEL(cpi))
2411 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2412 break;
2413 case X86_VENDOR_AMD:
2414 if (CPI_FAMILY(cpi) == 0xf)
2415 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2416 break;
2417 default:
2418 if (cpi->cpi_model == 0xf)
2419 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2420 break;
2421 }
2422
2423 cpi->cpi_step = CPI_STEP(cpi);
2424 cpi->cpi_brandid = CPI_BRANDID(cpi);
2425
2426 /*
2427 * *default* assumptions:
2428 * - believe %edx feature word
2429 * - ignore %ecx feature word
2430 * - 32-bit virtual and physical addressing
2431 */
2432 mask_edx = 0xffffffff;
2433 mask_ecx = 0;
2434
2435 cpi->cpi_pabits = cpi->cpi_vabits = 32;
2436
2437 switch (cpi->cpi_vendor) {
2438 case X86_VENDOR_Intel:
2439 if (cpi->cpi_family == 5)
2440 x86_type = X86_TYPE_P5;
2441 else if (IS_LEGACY_P6(cpi)) {
2442 x86_type = X86_TYPE_P6;
2443 pentiumpro_bug4046376 = 1;
2444 /*
2445 * Clear the SEP bit when it was set erroneously
2446 */
2447 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2448 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2449 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2450 x86_type = X86_TYPE_P4;
2451 /*
2452 * We don't currently depend on any of the %ecx
2453 * features until Prescott, so we'll only check
2454 * this from P4 onwards. We might want to revisit
2455 * that idea later.
2456 */
2457 mask_ecx = 0xffffffff;
2458 } else if (cpi->cpi_family > 0xf)
2459 mask_ecx = 0xffffffff;
2460 /*
2461 * We don't support MONITOR/MWAIT if leaf 5 is not available
2462 * to obtain the monitor linesize.
2463 */
2464 if (cpi->cpi_maxeax < 5)
2465 mask_ecx &= ~CPUID_INTC_ECX_MON;
2466 break;
2467 case X86_VENDOR_IntelClone:
2468 default:
2469 break;
2470 case X86_VENDOR_AMD:
2471 #if defined(OPTERON_ERRATUM_108)
2472 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2473 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2474 cpi->cpi_model = 0xc;
2475 } else
2476 #endif
2477 if (cpi->cpi_family == 5) {
2478 /*
2479 * AMD K5 and K6
2480 *
2481 * These CPUs have an incomplete implementation
2482 * of MCA/MCE which we mask away.
2483 */
2484 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2485
2486 /*
2487 * Model 0 uses the wrong (APIC) bit
2488 * to indicate PGE. Fix it here.
2489 */
2490 if (cpi->cpi_model == 0) {
2491 if (cp->cp_edx & 0x200) {
2492 cp->cp_edx &= ~0x200;
2493 cp->cp_edx |= CPUID_INTC_EDX_PGE;
2494 }
2495 }
2496
2497 /*
2498 * Early models had problems w/ MMX; disable.
2499 */
2500 if (cpi->cpi_model < 6)
2501 mask_edx &= ~CPUID_INTC_EDX_MMX;
2502 }
2503
2504 /*
2505 * For newer families, SSE3 and CX16, at least, are valid;
2506 * enable all
2507 */
2508 if (cpi->cpi_family >= 0xf)
2509 mask_ecx = 0xffffffff;
2510 /*
2511 * We don't support MONITOR/MWAIT if leaf 5 is not available
2512 * to obtain the monitor linesize.
2513 */
2514 if (cpi->cpi_maxeax < 5)
2515 mask_ecx &= ~CPUID_INTC_ECX_MON;
2516
2517 #if !defined(__xpv)
2518 /*
2519 * AMD has not historically used MWAIT in the CPU's idle loop.
2520 * Pre-family-10h Opterons do not have the MWAIT instruction. We
2521 * know for certain that in at least family 17h, per AMD, mwait
2522 * is preferred. Families in-between are less certain.
2523 */
2524 if (cpi->cpi_family < 0x17) {
2525 idle_cpu_prefer_mwait = 0;
2526 }
2527 #endif
2528
2529 break;
2530 case X86_VENDOR_TM:
2531 /*
2532 * workaround the NT workaround in CMS 4.1
2533 */
2534 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2535 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2536 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2537 break;
2538 case X86_VENDOR_Centaur:
2539 /*
2540 * workaround the NT workarounds again
2541 */
2542 if (cpi->cpi_family == 6)
2543 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2544 break;
2545 case X86_VENDOR_Cyrix:
2546 /*
2547 * We rely heavily on the probing in locore
2548 * to actually figure out what parts, if any,
2549 * of the Cyrix cpuid instruction to believe.
2550 */
2551 switch (x86_type) {
2552 case X86_TYPE_CYRIX_486:
2553 mask_edx = 0;
2554 break;
2555 case X86_TYPE_CYRIX_6x86:
2556 mask_edx = 0;
2557 break;
2558 case X86_TYPE_CYRIX_6x86L:
2559 mask_edx =
2560 CPUID_INTC_EDX_DE |
2561 CPUID_INTC_EDX_CX8;
2562 break;
2563 case X86_TYPE_CYRIX_6x86MX:
2564 mask_edx =
2565 CPUID_INTC_EDX_DE |
2566 CPUID_INTC_EDX_MSR |
2567 CPUID_INTC_EDX_CX8 |
2568 CPUID_INTC_EDX_PGE |
2569 CPUID_INTC_EDX_CMOV |
2570 CPUID_INTC_EDX_MMX;
2571 break;
2572 case X86_TYPE_CYRIX_GXm:
2573 mask_edx =
2574 CPUID_INTC_EDX_MSR |
2575 CPUID_INTC_EDX_CX8 |
2576 CPUID_INTC_EDX_CMOV |
2577 CPUID_INTC_EDX_MMX;
2578 break;
2579 case X86_TYPE_CYRIX_MediaGX:
2580 break;
2581 case X86_TYPE_CYRIX_MII:
2582 case X86_TYPE_VIA_CYRIX_III:
2583 mask_edx =
2584 CPUID_INTC_EDX_DE |
2585 CPUID_INTC_EDX_TSC |
2586 CPUID_INTC_EDX_MSR |
2587 CPUID_INTC_EDX_CX8 |
2588 CPUID_INTC_EDX_PGE |
2589 CPUID_INTC_EDX_CMOV |
2590 CPUID_INTC_EDX_MMX;
2591 break;
2592 default:
2593 break;
2594 }
2595 break;
2596 }
2597
2598 #if defined(__xpv)
2599 /*
2600 * Do not support MONITOR/MWAIT under a hypervisor
2601 */
2602 mask_ecx &= ~CPUID_INTC_ECX_MON;
2603 /*
2604 * Do not support XSAVE under a hypervisor for now
2605 */
2606 xsave_force_disable = B_TRUE;
2607
2608 #endif /* __xpv */
2609
2610 if (xsave_force_disable) {
2611 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2612 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2613 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2614 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2615 }
2616
2617 /*
2618 * Now we've figured out the masks that determine
2619 * which bits we choose to believe, apply the masks
2620 * to the feature words, then map the kernel's view
2621 * of these feature words into its feature word.
2622 */
2623 cp->cp_edx &= mask_edx;
2624 cp->cp_ecx &= mask_ecx;
2625
2626 /*
2627 * apply any platform restrictions (we don't call this
2628 * immediately after __cpuid_insn here, because we need the
2629 * workarounds applied above first)
2630 */
2631 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2632
2633 /*
2634 * In addition to ecx and edx, Intel and AMD are storing a bunch of
2635 * instruction set extensions in leaf 7's ebx, ecx, and edx.
2636 */
2637 if (cpi->cpi_maxeax >= 7) {
2638 struct cpuid_regs *ecp;
2639 ecp = &cpi->cpi_std[7];
2640 ecp->cp_eax = 7;
2641 ecp->cp_ecx = 0;
2642 (void) __cpuid_insn(ecp);
2643
2644 /*
2645 * If XSAVE has been disabled, just ignore all of the
2646 * extended-save-area dependent flags here.
2647 */
2648 if (xsave_force_disable) {
2649 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2650 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2651 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2652 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2653 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2654 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2655 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2656 }
2657
2658 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2659 add_x86_feature(featureset, X86FSET_SMEP);
2660
2661 /*
2662 * We check disable_smap here in addition to in startup_smap()
2663 * to ensure CPUs that aren't the boot CPU don't accidentally
2664 * include it in the feature set and thus generate a mismatched
2665 * x86 feature set across CPUs.
2666 */
2667 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2668 disable_smap == 0)
2669 add_x86_feature(featureset, X86FSET_SMAP);
2670
2671 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2672 add_x86_feature(featureset, X86FSET_RDSEED);
2673
2674 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2675 add_x86_feature(featureset, X86FSET_ADX);
2676
2677 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2678 add_x86_feature(featureset, X86FSET_FSGSBASE);
2679
2680 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2681 add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2682
2683 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2684 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2685 add_x86_feature(featureset, X86FSET_INVPCID);
2686
2687 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2688 add_x86_feature(featureset, X86FSET_MPX);
2689
2690 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2691 add_x86_feature(featureset, X86FSET_CLWB);
2692 }
2693 }
2694
2695 /*
2696 * fold in overrides from the "eeprom" mechanism
2697 */
2698 cp->cp_edx |= cpuid_feature_edx_include;
2699 cp->cp_edx &= ~cpuid_feature_edx_exclude;
2700
2701 cp->cp_ecx |= cpuid_feature_ecx_include;
2702 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2703
2704 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2705 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2706 }
2707 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2708 add_x86_feature(featureset, X86FSET_TSC);
2709 }
2710 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2711 add_x86_feature(featureset, X86FSET_MSR);
2712 }
2713 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2714 add_x86_feature(featureset, X86FSET_MTRR);
2715 }
2716 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2717 add_x86_feature(featureset, X86FSET_PGE);
2718 }
2719 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2720 add_x86_feature(featureset, X86FSET_CMOV);
2721 }
2722 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2723 add_x86_feature(featureset, X86FSET_MMX);
2724 }
2725 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2726 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2727 add_x86_feature(featureset, X86FSET_MCA);
2728 }
2729 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2730 add_x86_feature(featureset, X86FSET_PAE);
2731 }
2732 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2733 add_x86_feature(featureset, X86FSET_CX8);
2734 }
2735 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2736 add_x86_feature(featureset, X86FSET_CX16);
2737 }
2738 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2739 add_x86_feature(featureset, X86FSET_PAT);
2740 }
2741 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2742 add_x86_feature(featureset, X86FSET_SEP);
2743 }
2744 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2745 /*
2746 * In our implementation, fxsave/fxrstor
2747 * are prerequisites before we'll even
2748 * try and do SSE things.
2749 */
2750 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2751 add_x86_feature(featureset, X86FSET_SSE);
2752 }
2753 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2754 add_x86_feature(featureset, X86FSET_SSE2);
2755 }
2756 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2757 add_x86_feature(featureset, X86FSET_SSE3);
2758 }
2759 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2760 add_x86_feature(featureset, X86FSET_SSSE3);
2761 }
2762 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2763 add_x86_feature(featureset, X86FSET_SSE4_1);
2764 }
2765 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2766 add_x86_feature(featureset, X86FSET_SSE4_2);
2767 }
2768 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2769 add_x86_feature(featureset, X86FSET_AES);
2770 }
2771 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2772 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2773 }
2774
2775 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2776 add_x86_feature(featureset, X86FSET_SHA);
2777
2778 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2779 add_x86_feature(featureset, X86FSET_UMIP);
2780 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2781 add_x86_feature(featureset, X86FSET_PKU);
2782 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2783 add_x86_feature(featureset, X86FSET_OSPKE);
2784
2785 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2786 add_x86_feature(featureset, X86FSET_XSAVE);
2787
2788 /* We only test AVX & AVX512 when there is XSAVE */
2789
2790 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2791 add_x86_feature(featureset,
2792 X86FSET_AVX);
2793
2794 /*
2795 * Intel says we can't check these without also
2796 * checking AVX.
2797 */
2798 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2799 add_x86_feature(featureset,
2800 X86FSET_F16C);
2801
2802 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2803 add_x86_feature(featureset,
2804 X86FSET_FMA);
2805
2806 if (cpi->cpi_std[7].cp_ebx &
2807 CPUID_INTC_EBX_7_0_BMI1)
2808 add_x86_feature(featureset,
2809 X86FSET_BMI1);
2810
2811 if (cpi->cpi_std[7].cp_ebx &
2812 CPUID_INTC_EBX_7_0_BMI2)
2813 add_x86_feature(featureset,
2814 X86FSET_BMI2);
2815
2816 if (cpi->cpi_std[7].cp_ebx &
2817 CPUID_INTC_EBX_7_0_AVX2)
2818 add_x86_feature(featureset,
2819 X86FSET_AVX2);
2820 }
2821
2822 if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2823 (cpi->cpi_std[7].cp_ebx &
2824 CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2825 add_x86_feature(featureset, X86FSET_AVX512F);
2826
2827 if (cpi->cpi_std[7].cp_ebx &
2828 CPUID_INTC_EBX_7_0_AVX512DQ)
2829 add_x86_feature(featureset,
2830 X86FSET_AVX512DQ);
2831 if (cpi->cpi_std[7].cp_ebx &
2832 CPUID_INTC_EBX_7_0_AVX512IFMA)
2833 add_x86_feature(featureset,
2834 X86FSET_AVX512FMA);
2835 if (cpi->cpi_std[7].cp_ebx &
2836 CPUID_INTC_EBX_7_0_AVX512PF)
2837 add_x86_feature(featureset,
2838 X86FSET_AVX512PF);
2839 if (cpi->cpi_std[7].cp_ebx &
2840 CPUID_INTC_EBX_7_0_AVX512ER)
2841 add_x86_feature(featureset,
2842 X86FSET_AVX512ER);
2843 if (cpi->cpi_std[7].cp_ebx &
2844 CPUID_INTC_EBX_7_0_AVX512CD)
2845 add_x86_feature(featureset,
2846 X86FSET_AVX512CD);
2847 if (cpi->cpi_std[7].cp_ebx &
2848 CPUID_INTC_EBX_7_0_AVX512BW)
2849 add_x86_feature(featureset,
2850 X86FSET_AVX512BW);
2851 if (cpi->cpi_std[7].cp_ebx &
2852 CPUID_INTC_EBX_7_0_AVX512VL)
2853 add_x86_feature(featureset,
2854 X86FSET_AVX512VL);
2855
2856 if (cpi->cpi_std[7].cp_ecx &
2857 CPUID_INTC_ECX_7_0_AVX512VBMI)
2858 add_x86_feature(featureset,
2859 X86FSET_AVX512VBMI);
2860 if (cpi->cpi_std[7].cp_ecx &
2861 CPUID_INTC_ECX_7_0_AVX512VNNI)
2862 add_x86_feature(featureset,
2863 X86FSET_AVX512VNNI);
2864 if (cpi->cpi_std[7].cp_ecx &
2865 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2866 add_x86_feature(featureset,
2867 X86FSET_AVX512VPOPCDQ);
2868
2869 if (cpi->cpi_std[7].cp_edx &
2870 CPUID_INTC_EDX_7_0_AVX5124NNIW)
2871 add_x86_feature(featureset,
2872 X86FSET_AVX512NNIW);
2873 if (cpi->cpi_std[7].cp_edx &
2874 CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2875 add_x86_feature(featureset,
2876 X86FSET_AVX512FMAPS);
2877 }
2878 }
2879 }
2880
2881 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2882 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2883 add_x86_feature(featureset, X86FSET_PCID);
2884 }
2885 }
2886
2887 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2888 add_x86_feature(featureset, X86FSET_X2APIC);
2889 }
2890 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2891 add_x86_feature(featureset, X86FSET_DE);
2892 }
2893 #if !defined(__xpv)
2894 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2895
2896 /*
2897 * We require the CLFLUSH instruction for erratum workaround
2898 * to use MONITOR/MWAIT.
2899 */
2900 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2901 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2902 add_x86_feature(featureset, X86FSET_MWAIT);
2903 } else {
2904 extern int idle_cpu_assert_cflush_monitor;
2905
2906 /*
2907 * All processors we are aware of which have
2908 * MONITOR/MWAIT also have CLFLUSH.
2909 */
2910 if (idle_cpu_assert_cflush_monitor) {
2911 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2912 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2913 }
2914 }
2915 }
2916 #endif /* __xpv */
2917
2918 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2919 add_x86_feature(featureset, X86FSET_VMX);
2920 }
2921
2922 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2923 add_x86_feature(featureset, X86FSET_RDRAND);
2924
2925 /*
2926 * Only need it first time, rest of the cpus would follow suit.
2927 * we only capture this for the bootcpu.
2928 */
2929 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2930 add_x86_feature(featureset, X86FSET_CLFSH);
2931 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2932 }
2933 if (is_x86_feature(featureset, X86FSET_PAE))
2934 cpi->cpi_pabits = 36;
2935
2936 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2937 struct cpuid_regs r, *ecp;
2938
2939 ecp = &r;
2940 ecp->cp_eax = 0xD;
2941 ecp->cp_ecx = 1;
2942 ecp->cp_edx = ecp->cp_ebx = 0;
2943 (void) __cpuid_insn(ecp);
2944
2945 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2946 add_x86_feature(featureset, X86FSET_XSAVEOPT);
2947 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2948 add_x86_feature(featureset, X86FSET_XSAVEC);
2949 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
2950 add_x86_feature(featureset, X86FSET_XSAVES);
2951 }
2952
2953 /*
2954 * Work on the "extended" feature information, doing
2955 * some basic initialization for cpuid_pass2()
2956 */
2957 xcpuid = 0;
2958 switch (cpi->cpi_vendor) {
2959 case X86_VENDOR_Intel:
2960 /*
2961 * On KVM we know we will have proper support for extended
2962 * cpuid.
2963 */
2964 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
2965 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
2966 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
2967 xcpuid++;
2968 break;
2969 case X86_VENDOR_AMD:
2970 if (cpi->cpi_family > 5 ||
2971 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
2972 xcpuid++;
2973 break;
2974 case X86_VENDOR_Cyrix:
2975 /*
2976 * Only these Cyrix CPUs are -known- to support
2977 * extended cpuid operations.
2978 */
2979 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
2980 x86_type == X86_TYPE_CYRIX_GXm)
2981 xcpuid++;
2982 break;
2983 case X86_VENDOR_Centaur:
2984 case X86_VENDOR_TM:
2985 default:
2986 xcpuid++;
2987 break;
2988 }
2989
2990 if (xcpuid) {
2991 cp = &cpi->cpi_extd[0];
2992 cp->cp_eax = CPUID_LEAF_EXT_0;
2993 cpi->cpi_xmaxeax = __cpuid_insn(cp);
2994 }
2995
2996 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
2997
2998 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
2999 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3000
3001 switch (cpi->cpi_vendor) {
3002 case X86_VENDOR_Intel:
3003 case X86_VENDOR_AMD:
3004 if (cpi->cpi_xmaxeax < 0x80000001)
3005 break;
3006 cp = &cpi->cpi_extd[1];
3007 cp->cp_eax = 0x80000001;
3008 (void) __cpuid_insn(cp);
3009
3010 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3011 cpi->cpi_family == 5 &&
3012 cpi->cpi_model == 6 &&
3013 cpi->cpi_step == 6) {
3014 /*
3015 * K6 model 6 uses bit 10 to indicate SYSC
3016 * Later models use bit 11. Fix it here.
3017 */
3018 if (cp->cp_edx & 0x400) {
3019 cp->cp_edx &= ~0x400;
3020 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3021 }
3022 }
3023
3024 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3025
3026 /*
3027 * Compute the additions to the kernel's feature word.
3028 */
3029 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3030 add_x86_feature(featureset, X86FSET_NX);
3031 }
3032
3033 /*
3034 * Regardless whether or not we boot 64-bit,
3035 * we should have a way to identify whether
3036 * the CPU is capable of running 64-bit.
3037 */
3038 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3039 add_x86_feature(featureset, X86FSET_64);
3040 }
3041
3042 /* 1 GB large page - enable only for 64 bit kernel */
3043 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3044 add_x86_feature(featureset, X86FSET_1GPG);
3045 }
3046
3047 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3048 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3049 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3050 add_x86_feature(featureset, X86FSET_SSE4A);
3051 }
3052
3053 /*
3054 * It's really tricky to support syscall/sysret in
3055 * the i386 kernel; we rely on sysenter/sysexit
3056 * instead. In the amd64 kernel, things are -way-
3057 * better.
3058 */
3059 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3060 add_x86_feature(featureset, X86FSET_ASYSC);
3061 }
3062
3063 /*
3064 * While we're thinking about system calls, note
3065 * that AMD processors don't support sysenter
3066 * in long mode at all, so don't try to program them.
3067 */
3068 if (x86_vendor == X86_VENDOR_AMD) {
3069 remove_x86_feature(featureset, X86FSET_SEP);
3070 }
3071
3072 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3073 add_x86_feature(featureset, X86FSET_TSCP);
3074 }
3075
3076 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3077 add_x86_feature(featureset, X86FSET_SVM);
3078 }
3079
3080 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3081 add_x86_feature(featureset, X86FSET_TOPOEXT);
3082 }
3083
3084 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3085 add_x86_feature(featureset, X86FSET_XOP);
3086 }
3087
3088 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3089 add_x86_feature(featureset, X86FSET_FMA4);
3090 }
3091
3092 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3093 add_x86_feature(featureset, X86FSET_TBM);
3094 }
3095
3096 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3097 add_x86_feature(featureset, X86FSET_MONITORX);
3098 }
3099 break;
3100 default:
3101 break;
3102 }
3103
3104 /*
3105 * Get CPUID data about processor cores and hyperthreads.
3106 */
3107 switch (cpi->cpi_vendor) {
3108 case X86_VENDOR_Intel:
3109 if (cpi->cpi_maxeax >= 4) {
3110 cp = &cpi->cpi_std[4];
3111 cp->cp_eax = 4;
3112 cp->cp_ecx = 0;
3113 (void) __cpuid_insn(cp);
3114 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3115 }
3116 /*FALLTHROUGH*/
3117 case X86_VENDOR_AMD:
3118 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3119 break;
3120 cp = &cpi->cpi_extd[8];
3121 cp->cp_eax = CPUID_LEAF_EXT_8;
3122 (void) __cpuid_insn(cp);
3123 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3124 cp);
3125
3126 /*
3127 * AMD uses ebx for some extended functions.
3128 */
3129 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3130 /*
3131 * While we're here, check for the AMD "Error
3132 * Pointer Zero/Restore" feature. This can be
3133 * used to setup the FP save handlers
3134 * appropriately.
3135 */
3136 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3137 cpi->cpi_fp_amd_save = 0;
3138 } else {
3139 cpi->cpi_fp_amd_save = 1;
3140 }
3141
3142 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3143 add_x86_feature(featureset,
3144 X86FSET_CLZERO);
3145 }
3146 }
3147
3148 /*
3149 * Virtual and physical address limits from
3150 * cpuid override previously guessed values.
3151 */
3152 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3153 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3154 break;
3155 default:
3156 break;
3157 }
3158
3159 /*
3160 * Get CPUID data about TSC Invariance in Deep C-State.
3161 */
3162 switch (cpi->cpi_vendor) {
3163 case X86_VENDOR_Intel:
3164 case X86_VENDOR_AMD:
3165 if (cpi->cpi_maxeax >= 7) {
3166 cp = &cpi->cpi_extd[7];
3167 cp->cp_eax = 0x80000007;
3168 cp->cp_ecx = 0;
3169 (void) __cpuid_insn(cp);
3170 }
3171 break;
3172 default:
3173 break;
3174 }
3175 }
3176
3177 cpuid_pass1_topology(cpu, featureset);
3178
3179 /*
3180 * Synthesize chip "revision" and socket type
3181 */
3182 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3183 cpi->cpi_model, cpi->cpi_step);
3184 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3185 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3186 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3187 cpi->cpi_model, cpi->cpi_step);
3188
3189 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3190 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3191 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3192 /* Special handling for AMD FP not necessary. */
3193 cpi->cpi_fp_amd_save = 0;
3194 } else {
3195 cpi->cpi_fp_amd_save = 1;
3196 }
3197 }
3198
3199 /*
3200 * Check the processor leaves that are used for security features.
3201 */
3202 cpuid_scan_security(cpu, featureset);
3203
3204 pass1_done:
3205 cpi->cpi_pass = 1;
3206 }
3207
3208 /*
3209 * Make copies of the cpuid table entries we depend on, in
3210 * part for ease of parsing now, in part so that we have only
3211 * one place to correct any of it, in part for ease of
3212 * later export to userland, and in part so we can look at
3213 * this stuff in a crash dump.
3214 */
3215
3216 /*ARGSUSED*/
3217 void
3218 cpuid_pass2(cpu_t *cpu)
3219 {
3220 uint_t n, nmax;
3221 int i;
3222 struct cpuid_regs *cp;
3223 uint8_t *dp;
3224 uint32_t *iptr;
3225 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3226
3227 ASSERT(cpi->cpi_pass == 1);
3228
3229 if (cpi->cpi_maxeax < 1)
3230 goto pass2_done;
3231
3232 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3233 nmax = NMAX_CPI_STD;
3234 /*
3235 * (We already handled n == 0 and n == 1 in pass 1)
3236 */
3237 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3238 cp->cp_eax = n;
3239
3240 /*
3241 * n == 7 was handled in pass 1
3242 */
3243 if (n == 7)
3244 continue;
3245
3246 /*
3247 * CPUID function 4 expects %ecx to be initialized
3248 * with an index which indicates which cache to return
3249 * information about. The OS is expected to call function 4
3250 * with %ecx set to 0, 1, 2, ... until it returns with
3251 * EAX[4:0] set to 0, which indicates there are no more
3252 * caches.
3253 *
3254 * Here, populate cpi_std[4] with the information returned by
3255 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3256 * when dynamic memory allocation becomes available.
3257 *
3258 * Note: we need to explicitly initialize %ecx here, since
3259 * function 4 may have been previously invoked.
3260 */
3261 if (n == 4)
3262 cp->cp_ecx = 0;
3263
3264 (void) __cpuid_insn(cp);
3265 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3266 switch (n) {
3267 case 2:
3268 /*
3269 * "the lower 8 bits of the %eax register
3270 * contain a value that identifies the number
3271 * of times the cpuid [instruction] has to be
3272 * executed to obtain a complete image of the
3273 * processor's caching systems."
3274 *
3275 * How *do* they make this stuff up?
3276 */
3277 cpi->cpi_ncache = sizeof (*cp) *
3278 BITX(cp->cp_eax, 7, 0);
3279 if (cpi->cpi_ncache == 0)
3280 break;
3281 cpi->cpi_ncache--; /* skip count byte */
3282
3283 /*
3284 * Well, for now, rather than attempt to implement
3285 * this slightly dubious algorithm, we just look
3286 * at the first 15 ..
3287 */
3288 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3289 cpi->cpi_ncache = sizeof (*cp) - 1;
3290
3291 dp = cpi->cpi_cacheinfo;
3292 if (BITX(cp->cp_eax, 31, 31) == 0) {
3293 uint8_t *p = (void *)&cp->cp_eax;
3294 for (i = 1; i < 4; i++)
3295 if (p[i] != 0)
3296 *dp++ = p[i];
3297 }
3298 if (BITX(cp->cp_ebx, 31, 31) == 0) {
3299 uint8_t *p = (void *)&cp->cp_ebx;
3300 for (i = 0; i < 4; i++)
3301 if (p[i] != 0)
3302 *dp++ = p[i];
3303 }
3304 if (BITX(cp->cp_ecx, 31, 31) == 0) {
3305 uint8_t *p = (void *)&cp->cp_ecx;
3306 for (i = 0; i < 4; i++)
3307 if (p[i] != 0)
3308 *dp++ = p[i];
3309 }
3310 if (BITX(cp->cp_edx, 31, 31) == 0) {
3311 uint8_t *p = (void *)&cp->cp_edx;
3312 for (i = 0; i < 4; i++)
3313 if (p[i] != 0)
3314 *dp++ = p[i];
3315 }
3316 break;
3317
3318 case 3: /* Processor serial number, if PSN supported */
3319 break;
3320
3321 case 4: /* Deterministic cache parameters */
3322 break;
3323
3324 case 5: /* Monitor/Mwait parameters */
3325 {
3326 size_t mwait_size;
3327
3328 /*
3329 * check cpi_mwait.support which was set in cpuid_pass1
3330 */
3331 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3332 break;
3333
3334 /*
3335 * Protect ourself from insane mwait line size.
3336 * Workaround for incomplete hardware emulator(s).
3337 */
3338 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3339 if (mwait_size < sizeof (uint32_t) ||
3340 !ISP2(mwait_size)) {
3341 #if DEBUG
3342 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3343 "size %ld", cpu->cpu_id, (long)mwait_size);
3344 #endif
3345 break;
3346 }
3347
3348 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3349 cpi->cpi_mwait.mon_max = mwait_size;
3350 if (MWAIT_EXTENSION(cpi)) {
3351 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3352 if (MWAIT_INT_ENABLE(cpi))
3353 cpi->cpi_mwait.support |=
3354 MWAIT_ECX_INT_ENABLE;
3355 }
3356 break;
3357 }
3358 default:
3359 break;
3360 }
3361 }
3362
3363 /*
3364 * XSAVE enumeration
3365 */
3366 if (cpi->cpi_maxeax >= 0xD) {
3367 struct cpuid_regs regs;
3368 boolean_t cpuid_d_valid = B_TRUE;
3369
3370 cp = ®s;
3371 cp->cp_eax = 0xD;
3372 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3373
3374 (void) __cpuid_insn(cp);
3375
3376 /*
3377 * Sanity checks for debug
3378 */
3379 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3380 (cp->cp_eax & XFEATURE_SSE) == 0) {
3381 cpuid_d_valid = B_FALSE;
3382 }
3383
3384 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3385 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3386 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3387
3388 /*
3389 * If the hw supports AVX, get the size and offset in the save
3390 * area for the ymm state.
3391 */
3392 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3393 cp->cp_eax = 0xD;
3394 cp->cp_ecx = 2;
3395 cp->cp_edx = cp->cp_ebx = 0;
3396
3397 (void) __cpuid_insn(cp);
3398
3399 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3400 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3401 cpuid_d_valid = B_FALSE;
3402 }
3403
3404 cpi->cpi_xsave.ymm_size = cp->cp_eax;
3405 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3406 }
3407
3408 /*
3409 * If the hw supports MPX, get the size and offset in the
3410 * save area for BNDREGS and BNDCSR.
3411 */
3412 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3413 cp->cp_eax = 0xD;
3414 cp->cp_ecx = 3;
3415 cp->cp_edx = cp->cp_ebx = 0;
3416
3417 (void) __cpuid_insn(cp);
3418
3419 cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3420 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3421
3422 cp->cp_eax = 0xD;
3423 cp->cp_ecx = 4;
3424 cp->cp_edx = cp->cp_ebx = 0;
3425
3426 (void) __cpuid_insn(cp);
3427
3428 cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3429 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3430 }
3431
3432 /*
3433 * If the hw supports AVX512, get the size and offset in the
3434 * save area for the opmask registers and zmm state.
3435 */
3436 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3437 cp->cp_eax = 0xD;
3438 cp->cp_ecx = 5;
3439 cp->cp_edx = cp->cp_ebx = 0;
3440
3441 (void) __cpuid_insn(cp);
3442
3443 cpi->cpi_xsave.opmask_size = cp->cp_eax;
3444 cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3445
3446 cp->cp_eax = 0xD;
3447 cp->cp_ecx = 6;
3448 cp->cp_edx = cp->cp_ebx = 0;
3449
3450 (void) __cpuid_insn(cp);
3451
3452 cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3453 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3454
3455 cp->cp_eax = 0xD;
3456 cp->cp_ecx = 7;
3457 cp->cp_edx = cp->cp_ebx = 0;
3458
3459 (void) __cpuid_insn(cp);
3460
3461 cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3462 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3463 }
3464
3465 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3466 xsave_state_size = 0;
3467 } else if (cpuid_d_valid) {
3468 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3469 } else {
3470 /* Broken CPUID 0xD, probably in HVM */
3471 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3472 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3473 ", ymm_size = %d, ymm_offset = %d\n",
3474 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3475 cpi->cpi_xsave.xsav_hw_features_high,
3476 (int)cpi->cpi_xsave.xsav_max_size,
3477 (int)cpi->cpi_xsave.ymm_size,
3478 (int)cpi->cpi_xsave.ymm_offset);
3479
3480 if (xsave_state_size != 0) {
3481 /*
3482 * This must be a non-boot CPU. We cannot
3483 * continue, because boot cpu has already
3484 * enabled XSAVE.
3485 */
3486 ASSERT(cpu->cpu_id != 0);
3487 cmn_err(CE_PANIC, "cpu%d: we have already "
3488 "enabled XSAVE on boot cpu, cannot "
3489 "continue.", cpu->cpu_id);
3490 } else {
3491 /*
3492 * If we reached here on the boot CPU, it's also
3493 * almost certain that we'll reach here on the
3494 * non-boot CPUs. When we're here on a boot CPU
3495 * we should disable the feature, on a non-boot
3496 * CPU we need to confirm that we have.
3497 */
3498 if (cpu->cpu_id == 0) {
3499 remove_x86_feature(x86_featureset,
3500 X86FSET_XSAVE);
3501 remove_x86_feature(x86_featureset,
3502 X86FSET_AVX);
3503 remove_x86_feature(x86_featureset,
3504 X86FSET_F16C);
3505 remove_x86_feature(x86_featureset,
3506 X86FSET_BMI1);
3507 remove_x86_feature(x86_featureset,
3508 X86FSET_BMI2);
3509 remove_x86_feature(x86_featureset,
3510 X86FSET_FMA);
3511 remove_x86_feature(x86_featureset,
3512 X86FSET_AVX2);
3513 remove_x86_feature(x86_featureset,
3514 X86FSET_MPX);
3515 remove_x86_feature(x86_featureset,
3516 X86FSET_AVX512F);
3517 remove_x86_feature(x86_featureset,
3518 X86FSET_AVX512DQ);
3519 remove_x86_feature(x86_featureset,
3520 X86FSET_AVX512PF);
3521 remove_x86_feature(x86_featureset,
3522 X86FSET_AVX512ER);
3523 remove_x86_feature(x86_featureset,
3524 X86FSET_AVX512CD);
3525 remove_x86_feature(x86_featureset,
3526 X86FSET_AVX512BW);
3527 remove_x86_feature(x86_featureset,
3528 X86FSET_AVX512VL);
3529 remove_x86_feature(x86_featureset,
3530 X86FSET_AVX512FMA);
3531 remove_x86_feature(x86_featureset,
3532 X86FSET_AVX512VBMI);
3533 remove_x86_feature(x86_featureset,
3534 X86FSET_AVX512VNNI);
3535 remove_x86_feature(x86_featureset,
3536 X86FSET_AVX512VPOPCDQ);
3537 remove_x86_feature(x86_featureset,
3538 X86FSET_AVX512NNIW);
3539 remove_x86_feature(x86_featureset,
3540 X86FSET_AVX512FMAPS);
3541
3542 CPI_FEATURES_ECX(cpi) &=
3543 ~CPUID_INTC_ECX_XSAVE;
3544 CPI_FEATURES_ECX(cpi) &=
3545 ~CPUID_INTC_ECX_AVX;
3546 CPI_FEATURES_ECX(cpi) &=
3547 ~CPUID_INTC_ECX_F16C;
3548 CPI_FEATURES_ECX(cpi) &=
3549 ~CPUID_INTC_ECX_FMA;
3550 CPI_FEATURES_7_0_EBX(cpi) &=
3551 ~CPUID_INTC_EBX_7_0_BMI1;
3552 CPI_FEATURES_7_0_EBX(cpi) &=
3553 ~CPUID_INTC_EBX_7_0_BMI2;
3554 CPI_FEATURES_7_0_EBX(cpi) &=
3555 ~CPUID_INTC_EBX_7_0_AVX2;
3556 CPI_FEATURES_7_0_EBX(cpi) &=
3557 ~CPUID_INTC_EBX_7_0_MPX;
3558 CPI_FEATURES_7_0_EBX(cpi) &=
3559 ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3560
3561 CPI_FEATURES_7_0_ECX(cpi) &=
3562 ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3563
3564 CPI_FEATURES_7_0_EDX(cpi) &=
3565 ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3566
3567 xsave_force_disable = B_TRUE;
3568 } else {
3569 VERIFY(is_x86_feature(x86_featureset,
3570 X86FSET_XSAVE) == B_FALSE);
3571 }
3572 }
3573 }
3574 }
3575
3576
3577 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3578 goto pass2_done;
3579
3580 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3581 nmax = NMAX_CPI_EXTD;
3582 /*
3583 * Copy the extended properties, fixing them as we go.
3584 * (We already handled n == 0 and n == 1 in pass 1)
3585 */
3586 iptr = (void *)cpi->cpi_brandstr;
3587 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3588 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3589 (void) __cpuid_insn(cp);
3590 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3591 cp);
3592 switch (n) {
3593 case 2:
3594 case 3:
3595 case 4:
3596 /*
3597 * Extract the brand string
3598 */
3599 *iptr++ = cp->cp_eax;
3600 *iptr++ = cp->cp_ebx;
3601 *iptr++ = cp->cp_ecx;
3602 *iptr++ = cp->cp_edx;
3603 break;
3604 case 5:
3605 switch (cpi->cpi_vendor) {
3606 case X86_VENDOR_AMD:
3607 /*
3608 * The Athlon and Duron were the first
3609 * parts to report the sizes of the
3610 * TLB for large pages. Before then,
3611 * we don't trust the data.
3612 */
3613 if (cpi->cpi_family < 6 ||
3614 (cpi->cpi_family == 6 &&
3615 cpi->cpi_model < 1))
3616 cp->cp_eax = 0;
3617 break;
3618 default:
3619 break;
3620 }
3621 break;
3622 case 6:
3623 switch (cpi->cpi_vendor) {
3624 case X86_VENDOR_AMD:
3625 /*
3626 * The Athlon and Duron were the first
3627 * AMD parts with L2 TLB's.
3628 * Before then, don't trust the data.
3629 */
3630 if (cpi->cpi_family < 6 ||
3631 cpi->cpi_family == 6 &&
3632 cpi->cpi_model < 1)
3633 cp->cp_eax = cp->cp_ebx = 0;
3634 /*
3635 * AMD Duron rev A0 reports L2
3636 * cache size incorrectly as 1K
3637 * when it is really 64K
3638 */
3639 if (cpi->cpi_family == 6 &&
3640 cpi->cpi_model == 3 &&
3641 cpi->cpi_step == 0) {
3642 cp->cp_ecx &= 0xffff;
3643 cp->cp_ecx |= 0x400000;
3644 }
3645 break;
3646 case X86_VENDOR_Cyrix: /* VIA C3 */
3647 /*
3648 * VIA C3 processors are a bit messed
3649 * up w.r.t. encoding cache sizes in %ecx
3650 */
3651 if (cpi->cpi_family != 6)
3652 break;
3653 /*
3654 * model 7 and 8 were incorrectly encoded
3655 *
3656 * xxx is model 8 really broken?
3657 */
3658 if (cpi->cpi_model == 7 ||
3659 cpi->cpi_model == 8)
3660 cp->cp_ecx =
3661 BITX(cp->cp_ecx, 31, 24) << 16 |
3662 BITX(cp->cp_ecx, 23, 16) << 12 |
3663 BITX(cp->cp_ecx, 15, 8) << 8 |
3664 BITX(cp->cp_ecx, 7, 0);
3665 /*
3666 * model 9 stepping 1 has wrong associativity
3667 */
3668 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3669 cp->cp_ecx |= 8 << 12;
3670 break;
3671 case X86_VENDOR_Intel:
3672 /*
3673 * Extended L2 Cache features function.
3674 * First appeared on Prescott.
3675 */
3676 default:
3677 break;
3678 }
3679 break;
3680 default:
3681 break;
3682 }
3683 }
3684
3685 pass2_done:
3686 cpi->cpi_pass = 2;
3687 }
3688
3689 static const char *
3690 intel_cpubrand(const struct cpuid_info *cpi)
3691 {
3692 int i;
3693
3694 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3695 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3696 return ("i486");
3697
3698 switch (cpi->cpi_family) {
3699 case 5:
3700 return ("Intel Pentium(r)");
3701 case 6:
3702 switch (cpi->cpi_model) {
3703 uint_t celeron, xeon;
3704 const struct cpuid_regs *cp;
3705 case 0:
3706 case 1:
3707 case 2:
3708 return ("Intel Pentium(r) Pro");
3709 case 3:
3710 case 4:
3711 return ("Intel Pentium(r) II");
3712 case 6:
3713 return ("Intel Celeron(r)");
3714 case 5:
3715 case 7:
3716 celeron = xeon = 0;
3717 cp = &cpi->cpi_std[2]; /* cache info */
3718
3719 for (i = 1; i < 4; i++) {
3720 uint_t tmp;
3721
3722 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3723 if (tmp == 0x40)
3724 celeron++;
3725 if (tmp >= 0x44 && tmp <= 0x45)
3726 xeon++;
3727 }
3728
3729 for (i = 0; i < 2; i++) {
3730 uint_t tmp;
3731
3732 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3733 if (tmp == 0x40)
3734 celeron++;
3735 else if (tmp >= 0x44 && tmp <= 0x45)
3736 xeon++;
3737 }
3738
3739 for (i = 0; i < 4; i++) {
3740 uint_t tmp;
3741
3742 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3743 if (tmp == 0x40)
3744 celeron++;
3745 else if (tmp >= 0x44 && tmp <= 0x45)
3746 xeon++;
3747 }
3748
3749 for (i = 0; i < 4; i++) {
3750 uint_t tmp;
3751
3752 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3753 if (tmp == 0x40)
3754 celeron++;
3755 else if (tmp >= 0x44 && tmp <= 0x45)
3756 xeon++;
3757 }
3758
3759 if (celeron)
3760 return ("Intel Celeron(r)");
3761 if (xeon)
3762 return (cpi->cpi_model == 5 ?
3763 "Intel Pentium(r) II Xeon(tm)" :
3764 "Intel Pentium(r) III Xeon(tm)");
3765 return (cpi->cpi_model == 5 ?
3766 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3767 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3768 default:
3769 break;
3770 }
3771 default:
3772 break;
3773 }
3774
3775 /* BrandID is present if the field is nonzero */
3776 if (cpi->cpi_brandid != 0) {
3777 static const struct {
3778 uint_t bt_bid;
3779 const char *bt_str;
3780 } brand_tbl[] = {
3781 { 0x1, "Intel(r) Celeron(r)" },
3782 { 0x2, "Intel(r) Pentium(r) III" },
3783 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
3784 { 0x4, "Intel(r) Pentium(r) III" },
3785 { 0x6, "Mobile Intel(r) Pentium(r) III" },
3786 { 0x7, "Mobile Intel(r) Celeron(r)" },
3787 { 0x8, "Intel(r) Pentium(r) 4" },
3788 { 0x9, "Intel(r) Pentium(r) 4" },
3789 { 0xa, "Intel(r) Celeron(r)" },
3790 { 0xb, "Intel(r) Xeon(tm)" },
3791 { 0xc, "Intel(r) Xeon(tm) MP" },
3792 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
3793 { 0xf, "Mobile Intel(r) Celeron(r)" },
3794 { 0x11, "Mobile Genuine Intel(r)" },
3795 { 0x12, "Intel(r) Celeron(r) M" },
3796 { 0x13, "Mobile Intel(r) Celeron(r)" },
3797 { 0x14, "Intel(r) Celeron(r)" },
3798 { 0x15, "Mobile Genuine Intel(r)" },
3799 { 0x16, "Intel(r) Pentium(r) M" },
3800 { 0x17, "Mobile Intel(r) Celeron(r)" }
3801 };
3802 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3803 uint_t sgn;
3804
3805 sgn = (cpi->cpi_family << 8) |
3806 (cpi->cpi_model << 4) | cpi->cpi_step;
3807
3808 for (i = 0; i < btblmax; i++)
3809 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3810 break;
3811 if (i < btblmax) {
3812 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3813 return ("Intel(r) Celeron(r)");
3814 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3815 return ("Intel(r) Xeon(tm) MP");
3816 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3817 return ("Intel(r) Xeon(tm)");
3818 return (brand_tbl[i].bt_str);
3819 }
3820 }
3821
3822 return (NULL);
3823 }
3824
3825 static const char *
3826 amd_cpubrand(const struct cpuid_info *cpi)
3827 {
3828 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3829 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3830 return ("i486 compatible");
3831
3832 switch (cpi->cpi_family) {
3833 case 5:
3834 switch (cpi->cpi_model) {
3835 case 0:
3836 case 1:
3837 case 2:
3838 case 3:
3839 case 4:
3840 case 5:
3841 return ("AMD-K5(r)");
3842 case 6:
3843 case 7:
3844 return ("AMD-K6(r)");
3845 case 8:
3846 return ("AMD-K6(r)-2");
3847 case 9:
3848 return ("AMD-K6(r)-III");
3849 default:
3850 return ("AMD (family 5)");
3851 }
3852 case 6:
3853 switch (cpi->cpi_model) {
3854 case 1:
3855 return ("AMD-K7(tm)");
3856 case 0:
3857 case 2:
3858 case 4:
3859 return ("AMD Athlon(tm)");
3860 case 3:
3861 case 7:
3862 return ("AMD Duron(tm)");
3863 case 6:
3864 case 8:
3865 case 10:
3866 /*
3867 * Use the L2 cache size to distinguish
3868 */
3869 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3870 "AMD Athlon(tm)" : "AMD Duron(tm)");
3871 default:
3872 return ("AMD (family 6)");
3873 }
3874 default:
3875 break;
3876 }
3877
3878 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3879 cpi->cpi_brandid != 0) {
3880 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3881 case 3:
3882 return ("AMD Opteron(tm) UP 1xx");
3883 case 4:
3884 return ("AMD Opteron(tm) DP 2xx");
3885 case 5:
3886 return ("AMD Opteron(tm) MP 8xx");
3887 default:
3888 return ("AMD Opteron(tm)");
3889 }
3890 }
3891
3892 return (NULL);
3893 }
3894
3895 static const char *
3896 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3897 {
3898 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3899 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3900 type == X86_TYPE_CYRIX_486)
3901 return ("i486 compatible");
3902
3903 switch (type) {
3904 case X86_TYPE_CYRIX_6x86:
3905 return ("Cyrix 6x86");
3906 case X86_TYPE_CYRIX_6x86L:
3907 return ("Cyrix 6x86L");
3908 case X86_TYPE_CYRIX_6x86MX:
3909 return ("Cyrix 6x86MX");
3910 case X86_TYPE_CYRIX_GXm:
3911 return ("Cyrix GXm");
3912 case X86_TYPE_CYRIX_MediaGX:
3913 return ("Cyrix MediaGX");
3914 case X86_TYPE_CYRIX_MII:
3915 return ("Cyrix M2");
3916 case X86_TYPE_VIA_CYRIX_III:
3917 return ("VIA Cyrix M3");
3918 default:
3919 /*
3920 * Have another wild guess ..
3921 */
3922 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3923 return ("Cyrix 5x86");
3924 else if (cpi->cpi_family == 5) {
3925 switch (cpi->cpi_model) {
3926 case 2:
3927 return ("Cyrix 6x86"); /* Cyrix M1 */
3928 case 4:
3929 return ("Cyrix MediaGX");
3930 default:
3931 break;
3932 }
3933 } else if (cpi->cpi_family == 6) {
3934 switch (cpi->cpi_model) {
3935 case 0:
3936 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3937 case 5:
3938 case 6:
3939 case 7:
3940 case 8:
3941 case 9:
3942 return ("VIA C3");
3943 default:
3944 break;
3945 }
3946 }
3947 break;
3948 }
3949 return (NULL);
3950 }
3951
3952 /*
3953 * This only gets called in the case that the CPU extended
3954 * feature brand string (0x80000002, 0x80000003, 0x80000004)
3955 * aren't available, or contain null bytes for some reason.
3956 */
3957 static void
3958 fabricate_brandstr(struct cpuid_info *cpi)
3959 {
3960 const char *brand = NULL;
3961
3962 switch (cpi->cpi_vendor) {
3963 case X86_VENDOR_Intel:
3964 brand = intel_cpubrand(cpi);
3965 break;
3966 case X86_VENDOR_AMD:
3967 brand = amd_cpubrand(cpi);
3968 break;
3969 case X86_VENDOR_Cyrix:
3970 brand = cyrix_cpubrand(cpi, x86_type);
3971 break;
3972 case X86_VENDOR_NexGen:
3973 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3974 brand = "NexGen Nx586";
3975 break;
3976 case X86_VENDOR_Centaur:
3977 if (cpi->cpi_family == 5)
3978 switch (cpi->cpi_model) {
3979 case 4:
3980 brand = "Centaur C6";
3981 break;
3982 case 8:
3983 brand = "Centaur C2";
3984 break;
3985 case 9:
3986 brand = "Centaur C3";
3987 break;
3988 default:
3989 break;
3990 }
3991 break;
3992 case X86_VENDOR_Rise:
3993 if (cpi->cpi_family == 5 &&
3994 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
3995 brand = "Rise mP6";
3996 break;
3997 case X86_VENDOR_SiS:
3998 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3999 brand = "SiS 55x";
4000 break;
4001 case X86_VENDOR_TM:
4002 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4003 brand = "Transmeta Crusoe TM3x00 or TM5x00";
4004 break;
4005 case X86_VENDOR_NSC:
4006 case X86_VENDOR_UMC:
4007 default:
4008 break;
4009 }
4010 if (brand) {
4011 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4012 return;
4013 }
4014
4015 /*
4016 * If all else fails ...
4017 */
4018 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4019 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4020 cpi->cpi_model, cpi->cpi_step);
4021 }
4022
4023 /*
4024 * This routine is called just after kernel memory allocation
4025 * becomes available on cpu0, and as part of mp_startup() on
4026 * the other cpus.
4027 *
4028 * Fixup the brand string, and collect any information from cpuid
4029 * that requires dynamically allocated storage to represent.
4030 */
4031 /*ARGSUSED*/
4032 void
4033 cpuid_pass3(cpu_t *cpu)
4034 {
4035 int i, max, shft, level, size;
4036 struct cpuid_regs regs;
4037 struct cpuid_regs *cp;
4038 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4039
4040 ASSERT(cpi->cpi_pass == 2);
4041
4042 /*
4043 * Deterministic cache parameters
4044 *
4045 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4046 * values that are present are currently defined to be the same. This
4047 * means we can use the same logic to parse it as long as we use the
4048 * appropriate leaf to get the data. If you're updating this, make sure
4049 * you're careful about which vendor supports which aspect.
4050 *
4051 * Take this opportunity to detect the number of threads sharing the
4052 * last level cache, and construct a corresponding cache id. The
4053 * respective cpuid_info members are initialized to the default case of
4054 * "no last level cache sharing".
4055 */
4056 cpi->cpi_ncpu_shr_last_cache = 1;
4057 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4058
4059 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4060 (cpi->cpi_vendor == X86_VENDOR_AMD &&
4061 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4062 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4063 uint32_t leaf;
4064
4065 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4066 leaf = 4;
4067 } else {
4068 leaf = CPUID_LEAF_EXT_1d;
4069 }
4070
4071 /*
4072 * Find the # of elements (size) returned by the leaf and along
4073 * the way detect last level cache sharing details.
4074 */
4075 bzero(®s, sizeof (regs));
4076 cp = ®s;
4077 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4078 cp->cp_eax = leaf;
4079 cp->cp_ecx = i;
4080
4081 (void) __cpuid_insn(cp);
4082
4083 if (CPI_CACHE_TYPE(cp) == 0)
4084 break;
4085 level = CPI_CACHE_LVL(cp);
4086 if (level > max) {
4087 max = level;
4088 cpi->cpi_ncpu_shr_last_cache =
4089 CPI_NTHR_SHR_CACHE(cp) + 1;
4090 }
4091 }
4092 cpi->cpi_cache_leaf_size = size = i;
4093
4094 /*
4095 * Allocate the cpi_cache_leaves array. The first element
4096 * references the regs for the corresponding leaf with %ecx set
4097 * to 0. This was gathered in cpuid_pass2().
4098 */
4099 if (size > 0) {
4100 cpi->cpi_cache_leaves =
4101 kmem_alloc(size * sizeof (cp), KM_SLEEP);
4102 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4103 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4104 } else {
4105 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4106 }
4107
4108 /*
4109 * Allocate storage to hold the additional regs
4110 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4111 *
4112 * The regs for the leaf, %ecx == 0 has already
4113 * been allocated as indicated above.
4114 */
4115 for (i = 1; i < size; i++) {
4116 cp = cpi->cpi_cache_leaves[i] =
4117 kmem_zalloc(sizeof (regs), KM_SLEEP);
4118 cp->cp_eax = leaf;
4119 cp->cp_ecx = i;
4120
4121 (void) __cpuid_insn(cp);
4122 }
4123 }
4124 /*
4125 * Determine the number of bits needed to represent
4126 * the number of CPUs sharing the last level cache.
4127 *
4128 * Shift off that number of bits from the APIC id to
4129 * derive the cache id.
4130 */
4131 shft = 0;
4132 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4133 shft++;
4134 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4135 }
4136
4137 /*
4138 * Now fixup the brand string
4139 */
4140 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4141 fabricate_brandstr(cpi);
4142 } else {
4143
4144 /*
4145 * If we successfully extracted a brand string from the cpuid
4146 * instruction, clean it up by removing leading spaces and
4147 * similar junk.
4148 */
4149 if (cpi->cpi_brandstr[0]) {
4150 size_t maxlen = sizeof (cpi->cpi_brandstr);
4151 char *src, *dst;
4152
4153 dst = src = (char *)cpi->cpi_brandstr;
4154 src[maxlen - 1] = '\0';
4155 /*
4156 * strip leading spaces
4157 */
4158 while (*src == ' ')
4159 src++;
4160 /*
4161 * Remove any 'Genuine' or "Authentic" prefixes
4162 */
4163 if (strncmp(src, "Genuine ", 8) == 0)
4164 src += 8;
4165 if (strncmp(src, "Authentic ", 10) == 0)
4166 src += 10;
4167
4168 /*
4169 * Now do an in-place copy.
4170 * Map (R) to (r) and (TM) to (tm).
4171 * The era of teletypes is long gone, and there's
4172 * -really- no need to shout.
4173 */
4174 while (*src != '\0') {
4175 if (src[0] == '(') {
4176 if (strncmp(src + 1, "R)", 2) == 0) {
4177 (void) strncpy(dst, "(r)", 3);
4178 src += 3;
4179 dst += 3;
4180 continue;
4181 }
4182 if (strncmp(src + 1, "TM)", 3) == 0) {
4183 (void) strncpy(dst, "(tm)", 4);
4184 src += 4;
4185 dst += 4;
4186 continue;
4187 }
4188 }
4189 *dst++ = *src++;
4190 }
4191 *dst = '\0';
4192
4193 /*
4194 * Finally, remove any trailing spaces
4195 */
4196 while (--dst > cpi->cpi_brandstr)
4197 if (*dst == ' ')
4198 *dst = '\0';
4199 else
4200 break;
4201 } else
4202 fabricate_brandstr(cpi);
4203 }
4204 cpi->cpi_pass = 3;
4205 }
4206
4207 /*
4208 * This routine is called out of bind_hwcap() much later in the life
4209 * of the kernel (post_startup()). The job of this routine is to resolve
4210 * the hardware feature support and kernel support for those features into
4211 * what we're actually going to tell applications via the aux vector.
4212 */
4213 void
4214 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4215 {
4216 struct cpuid_info *cpi;
4217 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4218
4219 if (cpu == NULL)
4220 cpu = CPU;
4221 cpi = cpu->cpu_m.mcpu_cpi;
4222
4223 ASSERT(cpi->cpi_pass == 3);
4224
4225 if (cpi->cpi_maxeax >= 1) {
4226 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4227 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4228 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4229
4230 *edx = CPI_FEATURES_EDX(cpi);
4231 *ecx = CPI_FEATURES_ECX(cpi);
4232 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4233
4234 /*
4235 * [these require explicit kernel support]
4236 */
4237 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4238 *edx &= ~CPUID_INTC_EDX_SEP;
4239
4240 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4241 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4242 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4243 *edx &= ~CPUID_INTC_EDX_SSE2;
4244
4245 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4246 *edx &= ~CPUID_INTC_EDX_HTT;
4247
4248 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4249 *ecx &= ~CPUID_INTC_ECX_SSE3;
4250
4251 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4252 *ecx &= ~CPUID_INTC_ECX_SSSE3;
4253 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4254 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4255 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4256 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4257 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4258 *ecx &= ~CPUID_INTC_ECX_AES;
4259 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4260 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4261 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4262 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4263 CPUID_INTC_ECX_OSXSAVE);
4264 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4265 *ecx &= ~CPUID_INTC_ECX_AVX;
4266 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4267 *ecx &= ~CPUID_INTC_ECX_F16C;
4268 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4269 *ecx &= ~CPUID_INTC_ECX_FMA;
4270 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4271 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4272 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4273 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4274 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4275 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4276 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4277 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4278 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4279 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4280
4281 /*
4282 * [no explicit support required beyond x87 fp context]
4283 */
4284 if (!fpu_exists)
4285 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4286
4287 /*
4288 * Now map the supported feature vector to things that we
4289 * think userland will care about.
4290 */
4291 if (*edx & CPUID_INTC_EDX_SEP)
4292 hwcap_flags |= AV_386_SEP;
4293 if (*edx & CPUID_INTC_EDX_SSE)
4294 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4295 if (*edx & CPUID_INTC_EDX_SSE2)
4296 hwcap_flags |= AV_386_SSE2;
4297 if (*ecx & CPUID_INTC_ECX_SSE3)
4298 hwcap_flags |= AV_386_SSE3;
4299 if (*ecx & CPUID_INTC_ECX_SSSE3)
4300 hwcap_flags |= AV_386_SSSE3;
4301 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4302 hwcap_flags |= AV_386_SSE4_1;
4303 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4304 hwcap_flags |= AV_386_SSE4_2;
4305 if (*ecx & CPUID_INTC_ECX_MOVBE)
4306 hwcap_flags |= AV_386_MOVBE;
4307 if (*ecx & CPUID_INTC_ECX_AES)
4308 hwcap_flags |= AV_386_AES;
4309 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4310 hwcap_flags |= AV_386_PCLMULQDQ;
4311 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4312 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4313 hwcap_flags |= AV_386_XSAVE;
4314
4315 if (*ecx & CPUID_INTC_ECX_AVX) {
4316 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4317 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4318
4319 hwcap_flags |= AV_386_AVX;
4320 if (*ecx & CPUID_INTC_ECX_F16C)
4321 hwcap_flags_2 |= AV_386_2_F16C;
4322 if (*ecx & CPUID_INTC_ECX_FMA)
4323 hwcap_flags_2 |= AV_386_2_FMA;
4324
4325 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4326 hwcap_flags_2 |= AV_386_2_BMI1;
4327 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4328 hwcap_flags_2 |= AV_386_2_BMI2;
4329 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4330 hwcap_flags_2 |= AV_386_2_AVX2;
4331 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4332 hwcap_flags_2 |= AV_386_2_AVX512F;
4333 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4334 hwcap_flags_2 |= AV_386_2_AVX512DQ;
4335 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4336 hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4337 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4338 hwcap_flags_2 |= AV_386_2_AVX512PF;
4339 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4340 hwcap_flags_2 |= AV_386_2_AVX512ER;
4341 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4342 hwcap_flags_2 |= AV_386_2_AVX512CD;
4343 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4344 hwcap_flags_2 |= AV_386_2_AVX512BW;
4345 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4346 hwcap_flags_2 |= AV_386_2_AVX512VL;
4347
4348 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4349 hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4350 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4351 hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4352 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4353 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4354
4355 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4356 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4357 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4358 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4359 }
4360 }
4361 if (*ecx & CPUID_INTC_ECX_VMX)
4362 hwcap_flags |= AV_386_VMX;
4363 if (*ecx & CPUID_INTC_ECX_POPCNT)
4364 hwcap_flags |= AV_386_POPCNT;
4365 if (*edx & CPUID_INTC_EDX_FPU)
4366 hwcap_flags |= AV_386_FPU;
4367 if (*edx & CPUID_INTC_EDX_MMX)
4368 hwcap_flags |= AV_386_MMX;
4369
4370 if (*edx & CPUID_INTC_EDX_TSC)
4371 hwcap_flags |= AV_386_TSC;
4372 if (*edx & CPUID_INTC_EDX_CX8)
4373 hwcap_flags |= AV_386_CX8;
4374 if (*edx & CPUID_INTC_EDX_CMOV)
4375 hwcap_flags |= AV_386_CMOV;
4376 if (*ecx & CPUID_INTC_ECX_CX16)
4377 hwcap_flags |= AV_386_CX16;
4378
4379 if (*ecx & CPUID_INTC_ECX_RDRAND)
4380 hwcap_flags_2 |= AV_386_2_RDRAND;
4381 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4382 hwcap_flags_2 |= AV_386_2_ADX;
4383 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4384 hwcap_flags_2 |= AV_386_2_RDSEED;
4385 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4386 hwcap_flags_2 |= AV_386_2_SHA;
4387 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4388 hwcap_flags_2 |= AV_386_2_FSGSBASE;
4389 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4390 hwcap_flags_2 |= AV_386_2_CLWB;
4391 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4392 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4393
4394 }
4395 /*
4396 * Check a few miscilaneous features.
4397 */
4398 if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4399 hwcap_flags_2 |= AV_386_2_CLZERO;
4400
4401 if (cpi->cpi_xmaxeax < 0x80000001)
4402 goto pass4_done;
4403
4404 switch (cpi->cpi_vendor) {
4405 struct cpuid_regs cp;
4406 uint32_t *edx, *ecx;
4407
4408 case X86_VENDOR_Intel:
4409 /*
4410 * Seems like Intel duplicated what we necessary
4411 * here to make the initial crop of 64-bit OS's work.
4412 * Hopefully, those are the only "extended" bits
4413 * they'll add.
4414 */
4415 /*FALLTHROUGH*/
4416
4417 case X86_VENDOR_AMD:
4418 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4419 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4420
4421 *edx = CPI_FEATURES_XTD_EDX(cpi);
4422 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4423
4424 /*
4425 * [these features require explicit kernel support]
4426 */
4427 switch (cpi->cpi_vendor) {
4428 case X86_VENDOR_Intel:
4429 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4430 *edx &= ~CPUID_AMD_EDX_TSCP;
4431 break;
4432
4433 case X86_VENDOR_AMD:
4434 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4435 *edx &= ~CPUID_AMD_EDX_TSCP;
4436 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4437 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4438 break;
4439
4440 default:
4441 break;
4442 }
4443
4444 /*
4445 * [no explicit support required beyond
4446 * x87 fp context and exception handlers]
4447 */
4448 if (!fpu_exists)
4449 *edx &= ~(CPUID_AMD_EDX_MMXamd |
4450 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4451
4452 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4453 *edx &= ~CPUID_AMD_EDX_NX;
4454 #if !defined(__amd64)
4455 *edx &= ~CPUID_AMD_EDX_LM;
4456 #endif
4457 /*
4458 * Now map the supported feature vector to
4459 * things that we think userland will care about.
4460 */
4461 #if defined(__amd64)
4462 if (*edx & CPUID_AMD_EDX_SYSC)
4463 hwcap_flags |= AV_386_AMD_SYSC;
4464 #endif
4465 if (*edx & CPUID_AMD_EDX_MMXamd)
4466 hwcap_flags |= AV_386_AMD_MMX;
4467 if (*edx & CPUID_AMD_EDX_3DNow)
4468 hwcap_flags |= AV_386_AMD_3DNow;
4469 if (*edx & CPUID_AMD_EDX_3DNowx)
4470 hwcap_flags |= AV_386_AMD_3DNowx;
4471 if (*ecx & CPUID_AMD_ECX_SVM)
4472 hwcap_flags |= AV_386_AMD_SVM;
4473
4474 switch (cpi->cpi_vendor) {
4475 case X86_VENDOR_AMD:
4476 if (*edx & CPUID_AMD_EDX_TSCP)
4477 hwcap_flags |= AV_386_TSCP;
4478 if (*ecx & CPUID_AMD_ECX_AHF64)
4479 hwcap_flags |= AV_386_AHF;
4480 if (*ecx & CPUID_AMD_ECX_SSE4A)
4481 hwcap_flags |= AV_386_AMD_SSE4A;
4482 if (*ecx & CPUID_AMD_ECX_LZCNT)
4483 hwcap_flags |= AV_386_AMD_LZCNT;
4484 if (*ecx & CPUID_AMD_ECX_MONITORX)
4485 hwcap_flags_2 |= AV_386_2_MONITORX;
4486 break;
4487
4488 case X86_VENDOR_Intel:
4489 if (*edx & CPUID_AMD_EDX_TSCP)
4490 hwcap_flags |= AV_386_TSCP;
4491 /*
4492 * Aarrgh.
4493 * Intel uses a different bit in the same word.
4494 */
4495 if (*ecx & CPUID_INTC_ECX_AHF64)
4496 hwcap_flags |= AV_386_AHF;
4497 break;
4498
4499 default:
4500 break;
4501 }
4502 break;
4503
4504 case X86_VENDOR_TM:
4505 cp.cp_eax = 0x80860001;
4506 (void) __cpuid_insn(&cp);
4507 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4508 break;
4509
4510 default:
4511 break;
4512 }
4513
4514 pass4_done:
4515 cpi->cpi_pass = 4;
4516 if (hwcap_out != NULL) {
4517 hwcap_out[0] = hwcap_flags;
4518 hwcap_out[1] = hwcap_flags_2;
4519 }
4520 }
4521
4522
4523 /*
4524 * Simulate the cpuid instruction using the data we previously
4525 * captured about this CPU. We try our best to return the truth
4526 * about the hardware, independently of kernel support.
4527 */
4528 uint32_t
4529 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4530 {
4531 struct cpuid_info *cpi;
4532 struct cpuid_regs *xcp;
4533
4534 if (cpu == NULL)
4535 cpu = CPU;
4536 cpi = cpu->cpu_m.mcpu_cpi;
4537
4538 ASSERT(cpuid_checkpass(cpu, 3));
4539
4540 /*
4541 * CPUID data is cached in two separate places: cpi_std for standard
4542 * CPUID leaves , and cpi_extd for extended CPUID leaves.
4543 */
4544 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4545 xcp = &cpi->cpi_std[cp->cp_eax];
4546 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4547 cp->cp_eax <= cpi->cpi_xmaxeax &&
4548 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4549 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4550 } else {
4551 /*
4552 * The caller is asking for data from an input parameter which
4553 * the kernel has not cached. In this case we go fetch from
4554 * the hardware and return the data directly to the user.
4555 */
4556 return (__cpuid_insn(cp));
4557 }
4558
4559 cp->cp_eax = xcp->cp_eax;
4560 cp->cp_ebx = xcp->cp_ebx;
4561 cp->cp_ecx = xcp->cp_ecx;
4562 cp->cp_edx = xcp->cp_edx;
4563 return (cp->cp_eax);
4564 }
4565
4566 int
4567 cpuid_checkpass(cpu_t *cpu, int pass)
4568 {
4569 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4570 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4571 }
4572
4573 int
4574 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4575 {
4576 ASSERT(cpuid_checkpass(cpu, 3));
4577
4578 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4579 }
4580
4581 int
4582 cpuid_is_cmt(cpu_t *cpu)
4583 {
4584 if (cpu == NULL)
4585 cpu = CPU;
4586
4587 ASSERT(cpuid_checkpass(cpu, 1));
4588
4589 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4590 }
4591
4592 /*
4593 * AMD and Intel both implement the 64-bit variant of the syscall
4594 * instruction (syscallq), so if there's -any- support for syscall,
4595 * cpuid currently says "yes, we support this".
4596 *
4597 * However, Intel decided to -not- implement the 32-bit variant of the
4598 * syscall instruction, so we provide a predicate to allow our caller
4599 * to test that subtlety here.
4600 *
4601 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4602 * even in the case where the hardware would in fact support it.
4603 */
4604 /*ARGSUSED*/
4605 int
4606 cpuid_syscall32_insn(cpu_t *cpu)
4607 {
4608 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4609
4610 #if !defined(__xpv)
4611 if (cpu == NULL)
4612 cpu = CPU;
4613
4614 /*CSTYLED*/
4615 {
4616 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4617
4618 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4619 cpi->cpi_xmaxeax >= 0x80000001 &&
4620 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4621 return (1);
4622 }
4623 #endif
4624 return (0);
4625 }
4626
4627 int
4628 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4629 {
4630 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4631
4632 static const char fmt[] =
4633 "x86 (%s %X family %d model %d step %d clock %d MHz)";
4634 static const char fmt_ht[] =
4635 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4636
4637 ASSERT(cpuid_checkpass(cpu, 1));
4638
4639 if (cpuid_is_cmt(cpu))
4640 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4641 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4642 cpi->cpi_family, cpi->cpi_model,
4643 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4644 return (snprintf(s, n, fmt,
4645 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4646 cpi->cpi_family, cpi->cpi_model,
4647 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4648 }
4649
4650 const char *
4651 cpuid_getvendorstr(cpu_t *cpu)
4652 {
4653 ASSERT(cpuid_checkpass(cpu, 1));
4654 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4655 }
4656
4657 uint_t
4658 cpuid_getvendor(cpu_t *cpu)
4659 {
4660 ASSERT(cpuid_checkpass(cpu, 1));
4661 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4662 }
4663
4664 uint_t
4665 cpuid_getfamily(cpu_t *cpu)
4666 {
4667 ASSERT(cpuid_checkpass(cpu, 1));
4668 return (cpu->cpu_m.mcpu_cpi->cpi_family);
4669 }
4670
4671 uint_t
4672 cpuid_getmodel(cpu_t *cpu)
4673 {
4674 ASSERT(cpuid_checkpass(cpu, 1));
4675 return (cpu->cpu_m.mcpu_cpi->cpi_model);
4676 }
4677
4678 uint_t
4679 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4680 {
4681 ASSERT(cpuid_checkpass(cpu, 1));
4682 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4683 }
4684
4685 uint_t
4686 cpuid_get_ncore_per_chip(cpu_t *cpu)
4687 {
4688 ASSERT(cpuid_checkpass(cpu, 1));
4689 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4690 }
4691
4692 uint_t
4693 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4694 {
4695 ASSERT(cpuid_checkpass(cpu, 2));
4696 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4697 }
4698
4699 id_t
4700 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4701 {
4702 ASSERT(cpuid_checkpass(cpu, 2));
4703 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4704 }
4705
4706 uint_t
4707 cpuid_getstep(cpu_t *cpu)
4708 {
4709 ASSERT(cpuid_checkpass(cpu, 1));
4710 return (cpu->cpu_m.mcpu_cpi->cpi_step);
4711 }
4712
4713 uint_t
4714 cpuid_getsig(struct cpu *cpu)
4715 {
4716 ASSERT(cpuid_checkpass(cpu, 1));
4717 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4718 }
4719
4720 uint32_t
4721 cpuid_getchiprev(struct cpu *cpu)
4722 {
4723 ASSERT(cpuid_checkpass(cpu, 1));
4724 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4725 }
4726
4727 const char *
4728 cpuid_getchiprevstr(struct cpu *cpu)
4729 {
4730 ASSERT(cpuid_checkpass(cpu, 1));
4731 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4732 }
4733
4734 uint32_t
4735 cpuid_getsockettype(struct cpu *cpu)
4736 {
4737 ASSERT(cpuid_checkpass(cpu, 1));
4738 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4739 }
4740
4741 const char *
4742 cpuid_getsocketstr(cpu_t *cpu)
4743 {
4744 static const char *socketstr = NULL;
4745 struct cpuid_info *cpi;
4746
4747 ASSERT(cpuid_checkpass(cpu, 1));
4748 cpi = cpu->cpu_m.mcpu_cpi;
4749
4750 /* Assume that socket types are the same across the system */
4751 if (socketstr == NULL)
4752 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4753 cpi->cpi_model, cpi->cpi_step);
4754
4755
4756 return (socketstr);
4757 }
4758
4759 int
4760 cpuid_get_chipid(cpu_t *cpu)
4761 {
4762 ASSERT(cpuid_checkpass(cpu, 1));
4763
4764 if (cpuid_is_cmt(cpu))
4765 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4766 return (cpu->cpu_id);
4767 }
4768
4769 id_t
4770 cpuid_get_coreid(cpu_t *cpu)
4771 {
4772 ASSERT(cpuid_checkpass(cpu, 1));
4773 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4774 }
4775
4776 int
4777 cpuid_get_pkgcoreid(cpu_t *cpu)
4778 {
4779 ASSERT(cpuid_checkpass(cpu, 1));
4780 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4781 }
4782
4783 int
4784 cpuid_get_clogid(cpu_t *cpu)
4785 {
4786 ASSERT(cpuid_checkpass(cpu, 1));
4787 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4788 }
4789
4790 int
4791 cpuid_get_cacheid(cpu_t *cpu)
4792 {
4793 ASSERT(cpuid_checkpass(cpu, 1));
4794 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4795 }
4796
4797 uint_t
4798 cpuid_get_procnodeid(cpu_t *cpu)
4799 {
4800 ASSERT(cpuid_checkpass(cpu, 1));
4801 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4802 }
4803
4804 uint_t
4805 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4806 {
4807 ASSERT(cpuid_checkpass(cpu, 1));
4808 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4809 }
4810
4811 uint_t
4812 cpuid_get_compunitid(cpu_t *cpu)
4813 {
4814 ASSERT(cpuid_checkpass(cpu, 1));
4815 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4816 }
4817
4818 uint_t
4819 cpuid_get_cores_per_compunit(cpu_t *cpu)
4820 {
4821 ASSERT(cpuid_checkpass(cpu, 1));
4822 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4823 }
4824
4825 /*ARGSUSED*/
4826 int
4827 cpuid_have_cr8access(cpu_t *cpu)
4828 {
4829 #if defined(__amd64)
4830 return (1);
4831 #else
4832 struct cpuid_info *cpi;
4833
4834 ASSERT(cpu != NULL);
4835 cpi = cpu->cpu_m.mcpu_cpi;
4836 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4837 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4838 return (1);
4839 return (0);
4840 #endif
4841 }
4842
4843 uint32_t
4844 cpuid_get_apicid(cpu_t *cpu)
4845 {
4846 ASSERT(cpuid_checkpass(cpu, 1));
4847 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4848 return (UINT32_MAX);
4849 } else {
4850 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4851 }
4852 }
4853
4854 void
4855 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4856 {
4857 struct cpuid_info *cpi;
4858
4859 if (cpu == NULL)
4860 cpu = CPU;
4861 cpi = cpu->cpu_m.mcpu_cpi;
4862
4863 ASSERT(cpuid_checkpass(cpu, 1));
4864
4865 if (pabits)
4866 *pabits = cpi->cpi_pabits;
4867 if (vabits)
4868 *vabits = cpi->cpi_vabits;
4869 }
4870
4871 size_t
4872 cpuid_get_xsave_size()
4873 {
4874 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4875 sizeof (struct xsave_state)));
4876 }
4877
4878 /*
4879 * Return true if the CPUs on this system require 'pointer clearing' for the
4880 * floating point error pointer exception handling. In the past, this has been
4881 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4882 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4883 * feature bit and is reflected in the cpi_fp_amd_save member.
4884 */
4885 boolean_t
4886 cpuid_need_fp_excp_handling()
4887 {
4888 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4889 cpuid_info0.cpi_fp_amd_save != 0);
4890 }
4891
4892 /*
4893 * Returns the number of data TLB entries for a corresponding
4894 * pagesize. If it can't be computed, or isn't known, the
4895 * routine returns zero. If you ask about an architecturally
4896 * impossible pagesize, the routine will panic (so that the
4897 * hat implementor knows that things are inconsistent.)
4898 */
4899 uint_t
4900 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4901 {
4902 struct cpuid_info *cpi;
4903 uint_t dtlb_nent = 0;
4904
4905 if (cpu == NULL)
4906 cpu = CPU;
4907 cpi = cpu->cpu_m.mcpu_cpi;
4908
4909 ASSERT(cpuid_checkpass(cpu, 1));
4910
4911 /*
4912 * Check the L2 TLB info
4913 */
4914 if (cpi->cpi_xmaxeax >= 0x80000006) {
4915 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4916
4917 switch (pagesize) {
4918
4919 case 4 * 1024:
4920 /*
4921 * All zero in the top 16 bits of the register
4922 * indicates a unified TLB. Size is in low 16 bits.
4923 */
4924 if ((cp->cp_ebx & 0xffff0000) == 0)
4925 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4926 else
4927 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4928 break;
4929
4930 case 2 * 1024 * 1024:
4931 if ((cp->cp_eax & 0xffff0000) == 0)
4932 dtlb_nent = cp->cp_eax & 0x0000ffff;
4933 else
4934 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4935 break;
4936
4937 default:
4938 panic("unknown L2 pagesize");
4939 /*NOTREACHED*/
4940 }
4941 }
4942
4943 if (dtlb_nent != 0)
4944 return (dtlb_nent);
4945
4946 /*
4947 * No L2 TLB support for this size, try L1.
4948 */
4949 if (cpi->cpi_xmaxeax >= 0x80000005) {
4950 struct cpuid_regs *cp = &cpi->cpi_extd[5];
4951
4952 switch (pagesize) {
4953 case 4 * 1024:
4954 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
4955 break;
4956 case 2 * 1024 * 1024:
4957 dtlb_nent = BITX(cp->cp_eax, 23, 16);
4958 break;
4959 default:
4960 panic("unknown L1 d-TLB pagesize");
4961 /*NOTREACHED*/
4962 }
4963 }
4964
4965 return (dtlb_nent);
4966 }
4967
4968 /*
4969 * Return 0 if the erratum is not present or not applicable, positive
4970 * if it is, and negative if the status of the erratum is unknown.
4971 *
4972 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
4973 * Processors" #25759, Rev 3.57, August 2005
4974 */
4975 int
4976 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
4977 {
4978 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4979 uint_t eax;
4980
4981 /*
4982 * Bail out if this CPU isn't an AMD CPU, or if it's
4983 * a legacy (32-bit) AMD CPU.
4984 */
4985 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
4986 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
4987 cpi->cpi_family == 6) {
4988 return (0);
4989 }
4990
4991 eax = cpi->cpi_std[1].cp_eax;
4992
4993 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
4994 #define SH_B3(eax) (eax == 0xf51)
4995 #define B(eax) (SH_B0(eax) || SH_B3(eax))
4996
4997 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
4998
4999 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5000 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5001 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
5002 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5003
5004 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5005 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
5006 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
5007 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5008
5009 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5010 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
5011 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
5012 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
5013 #define BH_E4(eax) (eax == 0x20fb1)
5014 #define SH_E5(eax) (eax == 0x20f42)
5015 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
5016 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
5017 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5018 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5019 DH_E6(eax) || JH_E6(eax))
5020
5021 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5022 #define DR_B0(eax) (eax == 0x100f20)
5023 #define DR_B1(eax) (eax == 0x100f21)
5024 #define DR_BA(eax) (eax == 0x100f2a)
5025 #define DR_B2(eax) (eax == 0x100f22)
5026 #define DR_B3(eax) (eax == 0x100f23)
5027 #define RB_C0(eax) (eax == 0x100f40)
5028
5029 switch (erratum) {
5030 case 1:
5031 return (cpi->cpi_family < 0x10);
5032 case 51: /* what does the asterisk mean? */
5033 return (B(eax) || SH_C0(eax) || CG(eax));
5034 case 52:
5035 return (B(eax));
5036 case 57:
5037 return (cpi->cpi_family <= 0x11);
5038 case 58:
5039 return (B(eax));
5040 case 60:
5041 return (cpi->cpi_family <= 0x11);
5042 case 61:
5043 case 62:
5044 case 63:
5045 case 64:
5046 case 65:
5047 case 66:
5048 case 68:
5049 case 69:
5050 case 70:
5051 case 71:
5052 return (B(eax));
5053 case 72:
5054 return (SH_B0(eax));
5055 case 74:
5056 return (B(eax));
5057 case 75:
5058 return (cpi->cpi_family < 0x10);
5059 case 76:
5060 return (B(eax));
5061 case 77:
5062 return (cpi->cpi_family <= 0x11);
5063 case 78:
5064 return (B(eax) || SH_C0(eax));
5065 case 79:
5066 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5067 case 80:
5068 case 81:
5069 case 82:
5070 return (B(eax));
5071 case 83:
5072 return (B(eax) || SH_C0(eax) || CG(eax));
5073 case 85:
5074 return (cpi->cpi_family < 0x10);
5075 case 86:
5076 return (SH_C0(eax) || CG(eax));
5077 case 88:
5078 #if !defined(__amd64)
5079 return (0);
5080 #else
5081 return (B(eax) || SH_C0(eax));
5082 #endif
5083 case 89:
5084 return (cpi->cpi_family < 0x10);
5085 case 90:
5086 return (B(eax) || SH_C0(eax) || CG(eax));
5087 case 91:
5088 case 92:
5089 return (B(eax) || SH_C0(eax));
5090 case 93:
5091 return (SH_C0(eax));
5092 case 94:
5093 return (B(eax) || SH_C0(eax) || CG(eax));
5094 case 95:
5095 #if !defined(__amd64)
5096 return (0);
5097 #else
5098 return (B(eax) || SH_C0(eax));
5099 #endif
5100 case 96:
5101 return (B(eax) || SH_C0(eax) || CG(eax));
5102 case 97:
5103 case 98:
5104 return (SH_C0(eax) || CG(eax));
5105 case 99:
5106 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5107 case 100:
5108 return (B(eax) || SH_C0(eax));
5109 case 101:
5110 case 103:
5111 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5112 case 104:
5113 return (SH_C0(eax) || CG(eax) || D0(eax));
5114 case 105:
5115 case 106:
5116 case 107:
5117 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5118 case 108:
5119 return (DH_CG(eax));
5120 case 109:
5121 return (SH_C0(eax) || CG(eax) || D0(eax));
5122 case 110:
5123 return (D0(eax) || EX(eax));
5124 case 111:
5125 return (CG(eax));
5126 case 112:
5127 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5128 case 113:
5129 return (eax == 0x20fc0);
5130 case 114:
5131 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5132 case 115:
5133 return (SH_E0(eax) || JH_E1(eax));
5134 case 116:
5135 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5136 case 117:
5137 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5138 case 118:
5139 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5140 JH_E6(eax));
5141 case 121:
5142 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5143 case 122:
5144 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5145 case 123:
5146 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5147 case 131:
5148 return (cpi->cpi_family < 0x10);
5149 case 6336786:
5150
5151 /*
5152 * Test for AdvPowerMgmtInfo.TscPStateInvariant
5153 * if this is a K8 family or newer processor. We're testing for
5154 * this 'erratum' to determine whether or not we have a constant
5155 * TSC.
5156 *
5157 * Our current fix for this is to disable the C1-Clock ramping.
5158 * However, this doesn't work on newer processor families nor
5159 * does it work when virtualized as those devices don't exist.
5160 */
5161 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5162 return (0);
5163 }
5164
5165 if (CPI_FAMILY(cpi) == 0xf) {
5166 struct cpuid_regs regs;
5167 regs.cp_eax = 0x80000007;
5168 (void) __cpuid_insn(®s);
5169 return (!(regs.cp_edx & 0x100));
5170 }
5171 return (0);
5172 case 6323525:
5173 /*
5174 * This erratum (K8 #147) is not present on family 10 and newer.
5175 */
5176 if (cpi->cpi_family >= 0x10) {
5177 return (0);
5178 }
5179 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5180 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5181
5182 case 6671130:
5183 /*
5184 * check for processors (pre-Shanghai) that do not provide
5185 * optimal management of 1gb ptes in its tlb.
5186 */
5187 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5188
5189 case 298:
5190 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5191 DR_B2(eax) || RB_C0(eax));
5192
5193 case 721:
5194 #if defined(__amd64)
5195 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5196 #else
5197 return (0);
5198 #endif
5199
5200 default:
5201 return (-1);
5202
5203 }
5204 }
5205
5206 /*
5207 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5208 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5209 */
5210 int
5211 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5212 {
5213 struct cpuid_info *cpi;
5214 uint_t osvwid;
5215 static int osvwfeature = -1;
5216 uint64_t osvwlength;
5217
5218
5219 cpi = cpu->cpu_m.mcpu_cpi;
5220
5221 /* confirm OSVW supported */
5222 if (osvwfeature == -1) {
5223 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5224 } else {
5225 /* assert that osvw feature setting is consistent on all cpus */
5226 ASSERT(osvwfeature ==
5227 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5228 }
5229 if (!osvwfeature)
5230 return (-1);
5231
5232 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5233
5234 switch (erratum) {
5235 case 298: /* osvwid is 0 */
5236 osvwid = 0;
5237 if (osvwlength <= (uint64_t)osvwid) {
5238 /* osvwid 0 is unknown */
5239 return (-1);
5240 }
5241
5242 /*
5243 * Check the OSVW STATUS MSR to determine the state
5244 * of the erratum where:
5245 * 0 - fixed by HW
5246 * 1 - BIOS has applied the workaround when BIOS
5247 * workaround is available. (Or for other errata,
5248 * OS workaround is required.)
5249 * For a value of 1, caller will confirm that the
5250 * erratum 298 workaround has indeed been applied by BIOS.
5251 *
5252 * A 1 may be set in cpus that have a HW fix
5253 * in a mixed cpu system. Regarding erratum 298:
5254 * In a multiprocessor platform, the workaround above
5255 * should be applied to all processors regardless of
5256 * silicon revision when an affected processor is
5257 * present.
5258 */
5259
5260 return (rdmsr(MSR_AMD_OSVW_STATUS +
5261 (osvwid / OSVW_ID_CNT_PER_MSR)) &
5262 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5263
5264 default:
5265 return (-1);
5266 }
5267 }
5268
5269 static const char assoc_str[] = "associativity";
5270 static const char line_str[] = "line-size";
5271 static const char size_str[] = "size";
5272
5273 static void
5274 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5275 uint32_t val)
5276 {
5277 char buf[128];
5278
5279 /*
5280 * ndi_prop_update_int() is used because it is desirable for
5281 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5282 */
5283 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5284 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5285 }
5286
5287 /*
5288 * Intel-style cache/tlb description
5289 *
5290 * Standard cpuid level 2 gives a randomly ordered
5291 * selection of tags that index into a table that describes
5292 * cache and tlb properties.
5293 */
5294
5295 static const char l1_icache_str[] = "l1-icache";
5296 static const char l1_dcache_str[] = "l1-dcache";
5297 static const char l2_cache_str[] = "l2-cache";
5298 static const char l3_cache_str[] = "l3-cache";
5299 static const char itlb4k_str[] = "itlb-4K";
5300 static const char dtlb4k_str[] = "dtlb-4K";
5301 static const char itlb2M_str[] = "itlb-2M";
5302 static const char itlb4M_str[] = "itlb-4M";
5303 static const char dtlb4M_str[] = "dtlb-4M";
5304 static const char dtlb24_str[] = "dtlb0-2M-4M";
5305 static const char itlb424_str[] = "itlb-4K-2M-4M";
5306 static const char itlb24_str[] = "itlb-2M-4M";
5307 static const char dtlb44_str[] = "dtlb-4K-4M";
5308 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5309 static const char sl2_cache_str[] = "sectored-l2-cache";
5310 static const char itrace_str[] = "itrace-cache";
5311 static const char sl3_cache_str[] = "sectored-l3-cache";
5312 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5313
5314 static const struct cachetab {
5315 uint8_t ct_code;
5316 uint8_t ct_assoc;
5317 uint16_t ct_line_size;
5318 size_t ct_size;
5319 const char *ct_label;
5320 } intel_ctab[] = {
5321 /*
5322 * maintain descending order!
5323 *
5324 * Codes ignored - Reason
5325 * ----------------------
5326 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5327 * f0H/f1H - Currently we do not interpret prefetch size by design
5328 */
5329 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5330 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5331 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5332 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5333 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5334 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5335 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5336 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5337 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5338 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5339 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5340 { 0xd0, 4, 64, 512*1024, l3_cache_str},
5341 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5342 { 0xc0, 4, 0, 8, dtlb44_str },
5343 { 0xba, 4, 0, 64, dtlb4k_str },
5344 { 0xb4, 4, 0, 256, dtlb4k_str },
5345 { 0xb3, 4, 0, 128, dtlb4k_str },
5346 { 0xb2, 4, 0, 64, itlb4k_str },
5347 { 0xb0, 4, 0, 128, itlb4k_str },
5348 { 0x87, 8, 64, 1024*1024, l2_cache_str},
5349 { 0x86, 4, 64, 512*1024, l2_cache_str},
5350 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5351 { 0x84, 8, 32, 1024*1024, l2_cache_str},
5352 { 0x83, 8, 32, 512*1024, l2_cache_str},
5353 { 0x82, 8, 32, 256*1024, l2_cache_str},
5354 { 0x80, 8, 64, 512*1024, l2_cache_str},
5355 { 0x7f, 2, 64, 512*1024, l2_cache_str},
5356 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5357 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5358 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5359 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5360 { 0x79, 8, 64, 128*1024, sl2_cache_str},
5361 { 0x78, 8, 64, 1024*1024, l2_cache_str},
5362 { 0x73, 8, 0, 64*1024, itrace_str},
5363 { 0x72, 8, 0, 32*1024, itrace_str},
5364 { 0x71, 8, 0, 16*1024, itrace_str},
5365 { 0x70, 8, 0, 12*1024, itrace_str},
5366 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5367 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5368 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5369 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5370 { 0x5d, 0, 0, 256, dtlb44_str},
5371 { 0x5c, 0, 0, 128, dtlb44_str},
5372 { 0x5b, 0, 0, 64, dtlb44_str},
5373 { 0x5a, 4, 0, 32, dtlb24_str},
5374 { 0x59, 0, 0, 16, dtlb4k_str},
5375 { 0x57, 4, 0, 16, dtlb4k_str},
5376 { 0x56, 4, 0, 16, dtlb4M_str},
5377 { 0x55, 0, 0, 7, itlb24_str},
5378 { 0x52, 0, 0, 256, itlb424_str},
5379 { 0x51, 0, 0, 128, itlb424_str},
5380 { 0x50, 0, 0, 64, itlb424_str},
5381 { 0x4f, 0, 0, 32, itlb4k_str},
5382 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5383 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5384 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5385 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5386 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5387 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5388 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5389 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5390 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5391 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5392 { 0x44, 4, 32, 1024*1024, l2_cache_str},
5393 { 0x43, 4, 32, 512*1024, l2_cache_str},
5394 { 0x42, 4, 32, 256*1024, l2_cache_str},
5395 { 0x41, 4, 32, 128*1024, l2_cache_str},
5396 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5397 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5398 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5399 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5400 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5401 { 0x39, 4, 64, 128*1024, sl2_cache_str},
5402 { 0x30, 8, 64, 32*1024, l1_icache_str},
5403 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5404 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5405 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5406 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5407 { 0x22, 4, 64, 512*1024, sl3_cache_str},
5408 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5409 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5410 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5411 { 0x0b, 4, 0, 4, itlb4M_str},
5412 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5413 { 0x08, 4, 32, 16*1024, l1_icache_str},
5414 { 0x06, 4, 32, 8*1024, l1_icache_str},
5415 { 0x05, 4, 0, 32, dtlb4M_str},
5416 { 0x04, 4, 0, 8, dtlb4M_str},
5417 { 0x03, 4, 0, 64, dtlb4k_str},
5418 { 0x02, 4, 0, 2, itlb4M_str},
5419 { 0x01, 4, 0, 32, itlb4k_str},
5420 { 0 }
5421 };
5422
5423 static const struct cachetab cyrix_ctab[] = {
5424 { 0x70, 4, 0, 32, "tlb-4K" },
5425 { 0x80, 4, 16, 16*1024, "l1-cache" },
5426 { 0 }
5427 };
5428
5429 /*
5430 * Search a cache table for a matching entry
5431 */
5432 static const struct cachetab *
5433 find_cacheent(const struct cachetab *ct, uint_t code)
5434 {
5435 if (code != 0) {
5436 for (; ct->ct_code != 0; ct++)
5437 if (ct->ct_code <= code)
5438 break;
5439 if (ct->ct_code == code)
5440 return (ct);
5441 }
5442 return (NULL);
5443 }
5444
5445 /*
5446 * Populate cachetab entry with L2 or L3 cache-information using
5447 * cpuid function 4. This function is called from intel_walk_cacheinfo()
5448 * when descriptor 0x49 is encountered. It returns 0 if no such cache
5449 * information is found.
5450 */
5451 static int
5452 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5453 {
5454 uint32_t level, i;
5455 int ret = 0;
5456
5457 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5458 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5459
5460 if (level == 2 || level == 3) {
5461 ct->ct_assoc =
5462 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5463 ct->ct_line_size =
5464 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5465 ct->ct_size = ct->ct_assoc *
5466 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5467 ct->ct_line_size *
5468 (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5469
5470 if (level == 2) {
5471 ct->ct_label = l2_cache_str;
5472 } else if (level == 3) {
5473 ct->ct_label = l3_cache_str;
5474 }
5475 ret = 1;
5476 }
5477 }
5478
5479 return (ret);
5480 }
5481
5482 /*
5483 * Walk the cacheinfo descriptor, applying 'func' to every valid element
5484 * The walk is terminated if the walker returns non-zero.
5485 */
5486 static void
5487 intel_walk_cacheinfo(struct cpuid_info *cpi,
5488 void *arg, int (*func)(void *, const struct cachetab *))
5489 {
5490 const struct cachetab *ct;
5491 struct cachetab des_49_ct, des_b1_ct;
5492 uint8_t *dp;
5493 int i;
5494
5495 if ((dp = cpi->cpi_cacheinfo) == NULL)
5496 return;
5497 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5498 /*
5499 * For overloaded descriptor 0x49 we use cpuid function 4
5500 * if supported by the current processor, to create
5501 * cache information.
5502 * For overloaded descriptor 0xb1 we use X86_PAE flag
5503 * to disambiguate the cache information.
5504 */
5505 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5506 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5507 ct = &des_49_ct;
5508 } else if (*dp == 0xb1) {
5509 des_b1_ct.ct_code = 0xb1;
5510 des_b1_ct.ct_assoc = 4;
5511 des_b1_ct.ct_line_size = 0;
5512 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5513 des_b1_ct.ct_size = 8;
5514 des_b1_ct.ct_label = itlb2M_str;
5515 } else {
5516 des_b1_ct.ct_size = 4;
5517 des_b1_ct.ct_label = itlb4M_str;
5518 }
5519 ct = &des_b1_ct;
5520 } else {
5521 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5522 continue;
5523 }
5524 }
5525
5526 if (func(arg, ct) != 0) {
5527 break;
5528 }
5529 }
5530 }
5531
5532 /*
5533 * (Like the Intel one, except for Cyrix CPUs)
5534 */
5535 static void
5536 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5537 void *arg, int (*func)(void *, const struct cachetab *))
5538 {
5539 const struct cachetab *ct;
5540 uint8_t *dp;
5541 int i;
5542
5543 if ((dp = cpi->cpi_cacheinfo) == NULL)
5544 return;
5545 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5546 /*
5547 * Search Cyrix-specific descriptor table first ..
5548 */
5549 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5550 if (func(arg, ct) != 0)
5551 break;
5552 continue;
5553 }
5554 /*
5555 * .. else fall back to the Intel one
5556 */
5557 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5558 if (func(arg, ct) != 0)
5559 break;
5560 continue;
5561 }
5562 }
5563 }
5564
5565 /*
5566 * A cacheinfo walker that adds associativity, line-size, and size properties
5567 * to the devinfo node it is passed as an argument.
5568 */
5569 static int
5570 add_cacheent_props(void *arg, const struct cachetab *ct)
5571 {
5572 dev_info_t *devi = arg;
5573
5574 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5575 if (ct->ct_line_size != 0)
5576 add_cache_prop(devi, ct->ct_label, line_str,
5577 ct->ct_line_size);
5578 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5579 return (0);
5580 }
5581
5582
5583 static const char fully_assoc[] = "fully-associative?";
5584
5585 /*
5586 * AMD style cache/tlb description
5587 *
5588 * Extended functions 5 and 6 directly describe properties of
5589 * tlbs and various cache levels.
5590 */
5591 static void
5592 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5593 {
5594 switch (assoc) {
5595 case 0: /* reserved; ignore */
5596 break;
5597 default:
5598 add_cache_prop(devi, label, assoc_str, assoc);
5599 break;
5600 case 0xff:
5601 add_cache_prop(devi, label, fully_assoc, 1);
5602 break;
5603 }
5604 }
5605
5606 static void
5607 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5608 {
5609 if (size == 0)
5610 return;
5611 add_cache_prop(devi, label, size_str, size);
5612 add_amd_assoc(devi, label, assoc);
5613 }
5614
5615 static void
5616 add_amd_cache(dev_info_t *devi, const char *label,
5617 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5618 {
5619 if (size == 0 || line_size == 0)
5620 return;
5621 add_amd_assoc(devi, label, assoc);
5622 /*
5623 * Most AMD parts have a sectored cache. Multiple cache lines are
5624 * associated with each tag. A sector consists of all cache lines
5625 * associated with a tag. For example, the AMD K6-III has a sector
5626 * size of 2 cache lines per tag.
5627 */
5628 if (lines_per_tag != 0)
5629 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5630 add_cache_prop(devi, label, line_str, line_size);
5631 add_cache_prop(devi, label, size_str, size * 1024);
5632 }
5633
5634 static void
5635 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5636 {
5637 switch (assoc) {
5638 case 0: /* off */
5639 break;
5640 case 1:
5641 case 2:
5642 case 4:
5643 add_cache_prop(devi, label, assoc_str, assoc);
5644 break;
5645 case 6:
5646 add_cache_prop(devi, label, assoc_str, 8);
5647 break;
5648 case 8:
5649 add_cache_prop(devi, label, assoc_str, 16);
5650 break;
5651 case 0xf:
5652 add_cache_prop(devi, label, fully_assoc, 1);
5653 break;
5654 default: /* reserved; ignore */
5655 break;
5656 }
5657 }
5658
5659 static void
5660 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5661 {
5662 if (size == 0 || assoc == 0)
5663 return;
5664 add_amd_l2_assoc(devi, label, assoc);
5665 add_cache_prop(devi, label, size_str, size);
5666 }
5667
5668 static void
5669 add_amd_l2_cache(dev_info_t *devi, const char *label,
5670 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5671 {
5672 if (size == 0 || assoc == 0 || line_size == 0)
5673 return;
5674 add_amd_l2_assoc(devi, label, assoc);
5675 if (lines_per_tag != 0)
5676 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5677 add_cache_prop(devi, label, line_str, line_size);
5678 add_cache_prop(devi, label, size_str, size * 1024);
5679 }
5680
5681 static void
5682 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5683 {
5684 struct cpuid_regs *cp;
5685
5686 if (cpi->cpi_xmaxeax < 0x80000005)
5687 return;
5688 cp = &cpi->cpi_extd[5];
5689
5690 /*
5691 * 4M/2M L1 TLB configuration
5692 *
5693 * We report the size for 2M pages because AMD uses two
5694 * TLB entries for one 4M page.
5695 */
5696 add_amd_tlb(devi, "dtlb-2M",
5697 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5698 add_amd_tlb(devi, "itlb-2M",
5699 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5700
5701 /*
5702 * 4K L1 TLB configuration
5703 */
5704
5705 switch (cpi->cpi_vendor) {
5706 uint_t nentries;
5707 case X86_VENDOR_TM:
5708 if (cpi->cpi_family >= 5) {
5709 /*
5710 * Crusoe processors have 256 TLB entries, but
5711 * cpuid data format constrains them to only
5712 * reporting 255 of them.
5713 */
5714 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5715 nentries = 256;
5716 /*
5717 * Crusoe processors also have a unified TLB
5718 */
5719 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5720 nentries);
5721 break;
5722 }
5723 /*FALLTHROUGH*/
5724 default:
5725 add_amd_tlb(devi, itlb4k_str,
5726 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5727 add_amd_tlb(devi, dtlb4k_str,
5728 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5729 break;
5730 }
5731
5732 /*
5733 * data L1 cache configuration
5734 */
5735
5736 add_amd_cache(devi, l1_dcache_str,
5737 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5738 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5739
5740 /*
5741 * code L1 cache configuration
5742 */
5743
5744 add_amd_cache(devi, l1_icache_str,
5745 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5746 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5747
5748 if (cpi->cpi_xmaxeax < 0x80000006)
5749 return;
5750 cp = &cpi->cpi_extd[6];
5751
5752 /* Check for a unified L2 TLB for large pages */
5753
5754 if (BITX(cp->cp_eax, 31, 16) == 0)
5755 add_amd_l2_tlb(devi, "l2-tlb-2M",
5756 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5757 else {
5758 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5759 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5760 add_amd_l2_tlb(devi, "l2-itlb-2M",
5761 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5762 }
5763
5764 /* Check for a unified L2 TLB for 4K pages */
5765
5766 if (BITX(cp->cp_ebx, 31, 16) == 0) {
5767 add_amd_l2_tlb(devi, "l2-tlb-4K",
5768 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5769 } else {
5770 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5771 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5772 add_amd_l2_tlb(devi, "l2-itlb-4K",
5773 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5774 }
5775
5776 add_amd_l2_cache(devi, l2_cache_str,
5777 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5778 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5779 }
5780
5781 /*
5782 * There are two basic ways that the x86 world describes it cache
5783 * and tlb architecture - Intel's way and AMD's way.
5784 *
5785 * Return which flavor of cache architecture we should use
5786 */
5787 static int
5788 x86_which_cacheinfo(struct cpuid_info *cpi)
5789 {
5790 switch (cpi->cpi_vendor) {
5791 case X86_VENDOR_Intel:
5792 if (cpi->cpi_maxeax >= 2)
5793 return (X86_VENDOR_Intel);
5794 break;
5795 case X86_VENDOR_AMD:
5796 /*
5797 * The K5 model 1 was the first part from AMD that reported
5798 * cache sizes via extended cpuid functions.
5799 */
5800 if (cpi->cpi_family > 5 ||
5801 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5802 return (X86_VENDOR_AMD);
5803 break;
5804 case X86_VENDOR_TM:
5805 if (cpi->cpi_family >= 5)
5806 return (X86_VENDOR_AMD);
5807 /*FALLTHROUGH*/
5808 default:
5809 /*
5810 * If they have extended CPU data for 0x80000005
5811 * then we assume they have AMD-format cache
5812 * information.
5813 *
5814 * If not, and the vendor happens to be Cyrix,
5815 * then try our-Cyrix specific handler.
5816 *
5817 * If we're not Cyrix, then assume we're using Intel's
5818 * table-driven format instead.
5819 */
5820 if (cpi->cpi_xmaxeax >= 0x80000005)
5821 return (X86_VENDOR_AMD);
5822 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5823 return (X86_VENDOR_Cyrix);
5824 else if (cpi->cpi_maxeax >= 2)
5825 return (X86_VENDOR_Intel);
5826 break;
5827 }
5828 return (-1);
5829 }
5830
5831 void
5832 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5833 struct cpuid_info *cpi)
5834 {
5835 dev_info_t *cpu_devi;
5836 int create;
5837
5838 cpu_devi = (dev_info_t *)dip;
5839
5840 /* device_type */
5841 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5842 "device_type", "cpu");
5843
5844 /* reg */
5845 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5846 "reg", cpu_id);
5847
5848 /* cpu-mhz, and clock-frequency */
5849 if (cpu_freq > 0) {
5850 long long mul;
5851
5852 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5853 "cpu-mhz", cpu_freq);
5854 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5855 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5856 "clock-frequency", (int)mul);
5857 }
5858
5859 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5860 return;
5861 }
5862
5863 /* vendor-id */
5864 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5865 "vendor-id", cpi->cpi_vendorstr);
5866
5867 if (cpi->cpi_maxeax == 0) {
5868 return;
5869 }
5870
5871 /*
5872 * family, model, and step
5873 */
5874 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5875 "family", CPI_FAMILY(cpi));
5876 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5877 "cpu-model", CPI_MODEL(cpi));
5878 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5879 "stepping-id", CPI_STEP(cpi));
5880
5881 /* type */
5882 switch (cpi->cpi_vendor) {
5883 case X86_VENDOR_Intel:
5884 create = 1;
5885 break;
5886 default:
5887 create = 0;
5888 break;
5889 }
5890 if (create)
5891 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5892 "type", CPI_TYPE(cpi));
5893
5894 /* ext-family */
5895 switch (cpi->cpi_vendor) {
5896 case X86_VENDOR_Intel:
5897 case X86_VENDOR_AMD:
5898 create = cpi->cpi_family >= 0xf;
5899 break;
5900 default:
5901 create = 0;
5902 break;
5903 }
5904 if (create)
5905 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5906 "ext-family", CPI_FAMILY_XTD(cpi));
5907
5908 /* ext-model */
5909 switch (cpi->cpi_vendor) {
5910 case X86_VENDOR_Intel:
5911 create = IS_EXTENDED_MODEL_INTEL(cpi);
5912 break;
5913 case X86_VENDOR_AMD:
5914 create = CPI_FAMILY(cpi) == 0xf;
5915 break;
5916 default:
5917 create = 0;
5918 break;
5919 }
5920 if (create)
5921 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5922 "ext-model", CPI_MODEL_XTD(cpi));
5923
5924 /* generation */
5925 switch (cpi->cpi_vendor) {
5926 case X86_VENDOR_AMD:
5927 /*
5928 * AMD K5 model 1 was the first part to support this
5929 */
5930 create = cpi->cpi_xmaxeax >= 0x80000001;
5931 break;
5932 default:
5933 create = 0;
5934 break;
5935 }
5936 if (create)
5937 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5938 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5939
5940 /* brand-id */
5941 switch (cpi->cpi_vendor) {
5942 case X86_VENDOR_Intel:
5943 /*
5944 * brand id first appeared on Pentium III Xeon model 8,
5945 * and Celeron model 8 processors and Opteron
5946 */
5947 create = cpi->cpi_family > 6 ||
5948 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5949 break;
5950 case X86_VENDOR_AMD:
5951 create = cpi->cpi_family >= 0xf;
5952 break;
5953 default:
5954 create = 0;
5955 break;
5956 }
5957 if (create && cpi->cpi_brandid != 0) {
5958 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5959 "brand-id", cpi->cpi_brandid);
5960 }
5961
5962 /* chunks, and apic-id */
5963 switch (cpi->cpi_vendor) {
5964 /*
5965 * first available on Pentium IV and Opteron (K8)
5966 */
5967 case X86_VENDOR_Intel:
5968 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5969 break;
5970 case X86_VENDOR_AMD:
5971 create = cpi->cpi_family >= 0xf;
5972 break;
5973 default:
5974 create = 0;
5975 break;
5976 }
5977 if (create) {
5978 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5979 "chunks", CPI_CHUNKS(cpi));
5980 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5981 "apic-id", cpi->cpi_apicid);
5982 if (cpi->cpi_chipid >= 0) {
5983 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5984 "chip#", cpi->cpi_chipid);
5985 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5986 "clog#", cpi->cpi_clogid);
5987 }
5988 }
5989
5990 /* cpuid-features */
5991 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5992 "cpuid-features", CPI_FEATURES_EDX(cpi));
5993
5994
5995 /* cpuid-features-ecx */
5996 switch (cpi->cpi_vendor) {
5997 case X86_VENDOR_Intel:
5998 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5999 break;
6000 case X86_VENDOR_AMD:
6001 create = cpi->cpi_family >= 0xf;
6002 break;
6003 default:
6004 create = 0;
6005 break;
6006 }
6007 if (create)
6008 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6009 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6010
6011 /* ext-cpuid-features */
6012 switch (cpi->cpi_vendor) {
6013 case X86_VENDOR_Intel:
6014 case X86_VENDOR_AMD:
6015 case X86_VENDOR_Cyrix:
6016 case X86_VENDOR_TM:
6017 case X86_VENDOR_Centaur:
6018 create = cpi->cpi_xmaxeax >= 0x80000001;
6019 break;
6020 default:
6021 create = 0;
6022 break;
6023 }
6024 if (create) {
6025 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6026 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6027 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6028 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6029 }
6030
6031 /*
6032 * Brand String first appeared in Intel Pentium IV, AMD K5
6033 * model 1, and Cyrix GXm. On earlier models we try and
6034 * simulate something similar .. so this string should always
6035 * same -something- about the processor, however lame.
6036 */
6037 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6038 "brand-string", cpi->cpi_brandstr);
6039
6040 /*
6041 * Finally, cache and tlb information
6042 */
6043 switch (x86_which_cacheinfo(cpi)) {
6044 case X86_VENDOR_Intel:
6045 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6046 break;
6047 case X86_VENDOR_Cyrix:
6048 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6049 break;
6050 case X86_VENDOR_AMD:
6051 amd_cache_info(cpi, cpu_devi);
6052 break;
6053 default:
6054 break;
6055 }
6056 }
6057
6058 struct l2info {
6059 int *l2i_csz;
6060 int *l2i_lsz;
6061 int *l2i_assoc;
6062 int l2i_ret;
6063 };
6064
6065 /*
6066 * A cacheinfo walker that fetches the size, line-size and associativity
6067 * of the L2 cache
6068 */
6069 static int
6070 intel_l2cinfo(void *arg, const struct cachetab *ct)
6071 {
6072 struct l2info *l2i = arg;
6073 int *ip;
6074
6075 if (ct->ct_label != l2_cache_str &&
6076 ct->ct_label != sl2_cache_str)
6077 return (0); /* not an L2 -- keep walking */
6078
6079 if ((ip = l2i->l2i_csz) != NULL)
6080 *ip = ct->ct_size;
6081 if ((ip = l2i->l2i_lsz) != NULL)
6082 *ip = ct->ct_line_size;
6083 if ((ip = l2i->l2i_assoc) != NULL)
6084 *ip = ct->ct_assoc;
6085 l2i->l2i_ret = ct->ct_size;
6086 return (1); /* was an L2 -- terminate walk */
6087 }
6088
6089 /*
6090 * AMD L2/L3 Cache and TLB Associativity Field Definition:
6091 *
6092 * Unlike the associativity for the L1 cache and tlb where the 8 bit
6093 * value is the associativity, the associativity for the L2 cache and
6094 * tlb is encoded in the following table. The 4 bit L2 value serves as
6095 * an index into the amd_afd[] array to determine the associativity.
6096 * -1 is undefined. 0 is fully associative.
6097 */
6098
6099 static int amd_afd[] =
6100 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6101
6102 static void
6103 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6104 {
6105 struct cpuid_regs *cp;
6106 uint_t size, assoc;
6107 int i;
6108 int *ip;
6109
6110 if (cpi->cpi_xmaxeax < 0x80000006)
6111 return;
6112 cp = &cpi->cpi_extd[6];
6113
6114 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6115 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6116 uint_t cachesz = size * 1024;
6117 assoc = amd_afd[i];
6118
6119 ASSERT(assoc != -1);
6120
6121 if ((ip = l2i->l2i_csz) != NULL)
6122 *ip = cachesz;
6123 if ((ip = l2i->l2i_lsz) != NULL)
6124 *ip = BITX(cp->cp_ecx, 7, 0);
6125 if ((ip = l2i->l2i_assoc) != NULL)
6126 *ip = assoc;
6127 l2i->l2i_ret = cachesz;
6128 }
6129 }
6130
6131 int
6132 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6133 {
6134 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6135 struct l2info __l2info, *l2i = &__l2info;
6136
6137 l2i->l2i_csz = csz;
6138 l2i->l2i_lsz = lsz;
6139 l2i->l2i_assoc = assoc;
6140 l2i->l2i_ret = -1;
6141
6142 switch (x86_which_cacheinfo(cpi)) {
6143 case X86_VENDOR_Intel:
6144 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6145 break;
6146 case X86_VENDOR_Cyrix:
6147 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6148 break;
6149 case X86_VENDOR_AMD:
6150 amd_l2cacheinfo(cpi, l2i);
6151 break;
6152 default:
6153 break;
6154 }
6155 return (l2i->l2i_ret);
6156 }
6157
6158 #if !defined(__xpv)
6159
6160 uint32_t *
6161 cpuid_mwait_alloc(cpu_t *cpu)
6162 {
6163 uint32_t *ret;
6164 size_t mwait_size;
6165
6166 ASSERT(cpuid_checkpass(CPU, 2));
6167
6168 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6169 if (mwait_size == 0)
6170 return (NULL);
6171
6172 /*
6173 * kmem_alloc() returns cache line size aligned data for mwait_size
6174 * allocations. mwait_size is currently cache line sized. Neither
6175 * of these implementation details are guarantied to be true in the
6176 * future.
6177 *
6178 * First try allocating mwait_size as kmem_alloc() currently returns
6179 * correctly aligned memory. If kmem_alloc() does not return
6180 * mwait_size aligned memory, then use mwait_size ROUNDUP.
6181 *
6182 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6183 * decide to free this memory.
6184 */
6185 ret = kmem_zalloc(mwait_size, KM_SLEEP);
6186 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6187 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6188 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6189 *ret = MWAIT_RUNNING;
6190 return (ret);
6191 } else {
6192 kmem_free(ret, mwait_size);
6193 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6194 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6195 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6196 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6197 *ret = MWAIT_RUNNING;
6198 return (ret);
6199 }
6200 }
6201
6202 void
6203 cpuid_mwait_free(cpu_t *cpu)
6204 {
6205 if (cpu->cpu_m.mcpu_cpi == NULL) {
6206 return;
6207 }
6208
6209 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6210 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6211 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6212 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6213 }
6214
6215 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6216 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6217 }
6218
6219 void
6220 patch_tsc_read(int flag)
6221 {
6222 size_t cnt;
6223
6224 switch (flag) {
6225 case TSC_NONE:
6226 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6227 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6228 break;
6229 case TSC_RDTSC_MFENCE:
6230 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6231 (void) memcpy((void *)tsc_read,
6232 (void *)&_tsc_mfence_start, cnt);
6233 break;
6234 case TSC_RDTSC_LFENCE:
6235 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6236 (void) memcpy((void *)tsc_read,
6237 (void *)&_tsc_lfence_start, cnt);
6238 break;
6239 case TSC_TSCP:
6240 cnt = &_tscp_end - &_tscp_start;
6241 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6242 break;
6243 default:
6244 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6245 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6246 break;
6247 }
6248 tsc_type = flag;
6249 }
6250
6251 int
6252 cpuid_deep_cstates_supported(void)
6253 {
6254 struct cpuid_info *cpi;
6255 struct cpuid_regs regs;
6256
6257 ASSERT(cpuid_checkpass(CPU, 1));
6258
6259 cpi = CPU->cpu_m.mcpu_cpi;
6260
6261 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6262 return (0);
6263
6264 switch (cpi->cpi_vendor) {
6265 case X86_VENDOR_Intel:
6266 if (cpi->cpi_xmaxeax < 0x80000007)
6267 return (0);
6268
6269 /*
6270 * TSC run at a constant rate in all ACPI C-states?
6271 */
6272 regs.cp_eax = 0x80000007;
6273 (void) __cpuid_insn(®s);
6274 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6275
6276 default:
6277 return (0);
6278 }
6279 }
6280
6281 #endif /* !__xpv */
6282
6283 void
6284 post_startup_cpu_fixups(void)
6285 {
6286 #ifndef __xpv
6287 /*
6288 * Some AMD processors support C1E state. Entering this state will
6289 * cause the local APIC timer to stop, which we can't deal with at
6290 * this time.
6291 */
6292 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6293 on_trap_data_t otd;
6294 uint64_t reg;
6295
6296 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6297 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6298 /* Disable C1E state if it is enabled by BIOS */
6299 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6300 AMD_ACTONCMPHALT_MASK) {
6301 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6302 AMD_ACTONCMPHALT_SHIFT);
6303 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6304 }
6305 }
6306 no_trap();
6307 }
6308 #endif /* !__xpv */
6309 }
6310
6311 void
6312 enable_pcid(void)
6313 {
6314 if (x86_use_pcid == -1)
6315 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6316
6317 if (x86_use_invpcid == -1) {
6318 x86_use_invpcid = is_x86_feature(x86_featureset,
6319 X86FSET_INVPCID);
6320 }
6321
6322 if (!x86_use_pcid)
6323 return;
6324
6325 /*
6326 * Intel say that on setting PCIDE, it immediately starts using the PCID
6327 * bits; better make sure there's nothing there.
6328 */
6329 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6330
6331 setcr4(getcr4() | CR4_PCIDE);
6332 }
6333
6334 /*
6335 * Setup necessary registers to enable XSAVE feature on this processor.
6336 * This function needs to be called early enough, so that no xsave/xrstor
6337 * ops will execute on the processor before the MSRs are properly set up.
6338 *
6339 * Current implementation has the following assumption:
6340 * - cpuid_pass1() is done, so that X86 features are known.
6341 * - fpu_probe() is done, so that fp_save_mech is chosen.
6342 */
6343 void
6344 xsave_setup_msr(cpu_t *cpu)
6345 {
6346 ASSERT(fp_save_mech == FP_XSAVE);
6347 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6348
6349 /* Enable OSXSAVE in CR4. */
6350 setcr4(getcr4() | CR4_OSXSAVE);
6351 /*
6352 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6353 * correct value.
6354 */
6355 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6356 setup_xfem();
6357 }
6358
6359 /*
6360 * Starting with the Westmere processor the local
6361 * APIC timer will continue running in all C-states,
6362 * including the deepest C-states.
6363 */
6364 int
6365 cpuid_arat_supported(void)
6366 {
6367 struct cpuid_info *cpi;
6368 struct cpuid_regs regs;
6369
6370 ASSERT(cpuid_checkpass(CPU, 1));
6371 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6372
6373 cpi = CPU->cpu_m.mcpu_cpi;
6374
6375 switch (cpi->cpi_vendor) {
6376 case X86_VENDOR_Intel:
6377 /*
6378 * Always-running Local APIC Timer is
6379 * indicated by CPUID.6.EAX[2].
6380 */
6381 if (cpi->cpi_maxeax >= 6) {
6382 regs.cp_eax = 6;
6383 (void) cpuid_insn(NULL, ®s);
6384 return (regs.cp_eax & CPUID_CSTATE_ARAT);
6385 } else {
6386 return (0);
6387 }
6388 default:
6389 return (0);
6390 }
6391 }
6392
6393 /*
6394 * Check support for Intel ENERGY_PERF_BIAS feature
6395 */
6396 int
6397 cpuid_iepb_supported(struct cpu *cp)
6398 {
6399 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6400 struct cpuid_regs regs;
6401
6402 ASSERT(cpuid_checkpass(cp, 1));
6403
6404 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6405 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6406 return (0);
6407 }
6408
6409 /*
6410 * Intel ENERGY_PERF_BIAS MSR is indicated by
6411 * capability bit CPUID.6.ECX.3
6412 */
6413 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6414 return (0);
6415
6416 regs.cp_eax = 0x6;
6417 (void) cpuid_insn(NULL, ®s);
6418 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6419 }
6420
6421 /*
6422 * Check support for TSC deadline timer
6423 *
6424 * TSC deadline timer provides a superior software programming
6425 * model over local APIC timer that eliminates "time drifts".
6426 * Instead of specifying a relative time, software specifies an
6427 * absolute time as the target at which the processor should
6428 * generate a timer event.
6429 */
6430 int
6431 cpuid_deadline_tsc_supported(void)
6432 {
6433 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6434 struct cpuid_regs regs;
6435
6436 ASSERT(cpuid_checkpass(CPU, 1));
6437 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6438
6439 switch (cpi->cpi_vendor) {
6440 case X86_VENDOR_Intel:
6441 if (cpi->cpi_maxeax >= 1) {
6442 regs.cp_eax = 1;
6443 (void) cpuid_insn(NULL, ®s);
6444 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6445 } else {
6446 return (0);
6447 }
6448 default:
6449 return (0);
6450 }
6451 }
6452
6453 #if defined(__amd64) && !defined(__xpv)
6454 /*
6455 * Patch in versions of bcopy for high performance Intel Nhm processors
6456 * and later...
6457 */
6458 void
6459 patch_memops(uint_t vendor)
6460 {
6461 size_t cnt, i;
6462 caddr_t to, from;
6463
6464 if ((vendor == X86_VENDOR_Intel) &&
6465 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6466 cnt = &bcopy_patch_end - &bcopy_patch_start;
6467 to = &bcopy_ck_size;
6468 from = &bcopy_patch_start;
6469 for (i = 0; i < cnt; i++) {
6470 *to++ = *from++;
6471 }
6472 }
6473 }
6474 #endif /* __amd64 && !__xpv */
6475
6476 /*
6477 * We're being asked to tell the system how many bits are required to represent
6478 * the various thread and strand IDs. While it's tempting to derive this based
6479 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6480 * correct. Instead, this needs to be based on the number of bits that the APIC
6481 * allows for these different configurations. We only update these to a larger
6482 * value if we find one.
6483 */
6484 void
6485 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6486 {
6487 struct cpuid_info *cpi;
6488
6489 VERIFY(cpuid_checkpass(CPU, 1));
6490 cpi = cpu->cpu_m.mcpu_cpi;
6491
6492 if (cpi->cpi_ncore_bits > *core_nbits) {
6493 *core_nbits = cpi->cpi_ncore_bits;
6494 }
6495
6496 if (cpi->cpi_nthread_bits > *strand_nbits) {
6497 *strand_nbits = cpi->cpi_nthread_bits;
6498 }
6499 }
6500
6501 void
6502 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6503 {
6504 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6505 struct cpuid_regs cp;
6506
6507 /*
6508 * Reread the CPUID portions that we need for various security
6509 * information.
6510 */
6511 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6512 /*
6513 * Check if we now have leaf 7 available to us.
6514 */
6515 if (cpi->cpi_maxeax < 7) {
6516 bzero(&cp, sizeof (cp));
6517 cp.cp_eax = 0;
6518 cpi->cpi_maxeax = __cpuid_insn(&cp);
6519 if (cpi->cpi_maxeax < 7)
6520 return;
6521 }
6522
6523 bzero(&cp, sizeof (cp));
6524 cp.cp_eax = 7;
6525 cp.cp_ecx = 0;
6526 (void) __cpuid_insn(&cp);
6527 cpi->cpi_std[7] = cp;
6528 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6529 /* No xcpuid support */
6530 if (cpi->cpi_family < 5 ||
6531 (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6532 return;
6533
6534 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6535 bzero(&cp, sizeof (cp));
6536 cp.cp_eax = CPUID_LEAF_EXT_0;
6537 cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6538 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6539 return;
6540 }
6541 }
6542
6543 bzero(&cp, sizeof (cp));
6544 cp.cp_eax = CPUID_LEAF_EXT_8;
6545 (void) __cpuid_insn(&cp);
6546 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6547 cpi->cpi_extd[8] = cp;
6548 } else {
6549 /*
6550 * Nothing to do here. Return an empty set which has already
6551 * been zeroed for us.
6552 */
6553 return;
6554 }
6555 cpuid_scan_security(cpu, fset);
6556 }
6557
6558 /* ARGSUSED */
6559 static int
6560 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6561 {
6562 uchar_t *fset;
6563
6564 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6565 cpuid_pass_ucode(CPU, fset);
6566
6567 return (0);
6568 }
6569
6570 /*
6571 * After a microcode update where the version has changed, then we need to
6572 * rescan CPUID. To do this we check every CPU to make sure that they have the
6573 * same microcode. Then we perform a cross call to all such CPUs. It's the
6574 * caller's job to make sure that no one else can end up doing an update while
6575 * this is going on.
6576 *
6577 * We assume that the system is microcode capable if we're called.
6578 */
6579 void
6580 cpuid_post_ucodeadm(void)
6581 {
6582 uint32_t rev;
6583 int i;
6584 struct cpu *cpu;
6585 cpuset_t cpuset;
6586 void *argdata;
6587 uchar_t *f0;
6588
6589 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6590
6591 mutex_enter(&cpu_lock);
6592 cpu = cpu_get(0);
6593 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6594 CPUSET_ONLY(cpuset, 0);
6595 for (i = 1; i < max_ncpus; i++) {
6596 if ((cpu = cpu_get(i)) == NULL)
6597 continue;
6598
6599 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6600 panic("post microcode update CPU %d has differing "
6601 "microcode revision (%u) from CPU 0 (%u)",
6602 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6603 }
6604 CPUSET_ADD(cpuset, i);
6605 }
6606
6607 kpreempt_disable();
6608 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6609 cpuid_post_ucodeadm_xc);
6610 kpreempt_enable();
6611
6612 /*
6613 * OK, now look at each CPU and see if their feature sets are equal.
6614 */
6615 f0 = argdata;
6616 for (i = 1; i < max_ncpus; i++) {
6617 uchar_t *fset;
6618 if (!CPU_IN_SET(cpuset, i))
6619 continue;
6620
6621 fset = (uchar_t *)((uintptr_t)argdata +
6622 sizeof (x86_featureset) * i);
6623
6624 if (!compare_x86_featureset(f0, fset)) {
6625 panic("Post microcode update CPU %d has "
6626 "differing security feature (%p) set from CPU 0 "
6627 "(%p), not appending to feature set", i,
6628 (void *)fset, (void *)f0);
6629 }
6630 }
6631
6632 mutex_exit(&cpu_lock);
6633
6634 for (i = 0; i < NUM_X86_FEATURES; i++) {
6635 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6636 x86_feature_names[i]);
6637 if (is_x86_feature(f0, i)) {
6638 add_x86_feature(x86_featureset, i);
6639 }
6640 }
6641 kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6642 }