Print this page
OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 26 */
27 27 /*
28 28 * Copyright (c) 2010, Intel Corporation.
29 29 * All rights reserved.
30 30 */
31 31 /*
32 32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 33 */
34 34 /*
35 35 * Copyright 2019, Joyent, Inc.
36 36 */
37 37
38 38 /*
39 39 * CPU Identification logic
40 40 *
41 41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
42 42 * with the identification of CPUs, their features, and their topologies. More
43 43 * specifically, this file helps drive the following:
44 44 *
45 45 * 1. Enumeration of features of the processor which are used by the kernel to
46 46 * determine what features to enable or disable. These may be instruction set
47 47 * enhancements or features that we use.
48 48 *
49 49 * 2. Enumeration of instruction set architecture (ISA) additions that userland
50 50 * will be told about through the auxiliary vector.
51 51 *
52 52 * 3. Understanding the physical topology of the CPU such as the number of
53 53 * caches, how many cores it has, whether or not it supports symmetric
54 54 * multi-processing (SMT), etc.
55 55 *
56 56 * ------------------------
57 57 * CPUID History and Basics
58 58 * ------------------------
59 59 *
60 60 * The cpuid instruction was added by Intel roughly around the time that the
61 61 * original Pentium was introduced. The purpose of cpuid was to tell in a
62 62 * programmatic fashion information about the CPU that previously was guessed
63 63 * at. For example, an important part of cpuid is that we can know what
64 64 * extensions to the ISA exist. If you use an invalid opcode you would get a
65 65 * #UD, so this method allows a program (whether a user program or the kernel)
66 66 * to determine what exists without crashing or getting a SIGILL. Of course,
67 67 * this was also during the era of the clones and the AMD Am5x86. The vendor
68 68 * name shows up first in cpuid for a reason.
69 69 *
70 70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
71 71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
72 72 * its own meaning. The different leaves are broken down into different regions:
73 73 *
74 74 * [ 0, 7fffffff ] This region is called the 'basic'
75 75 * region. This region is generally defined
76 76 * by Intel, though some of the original
77 77 * portions have different meanings based
78 78 * on the manufacturer. These days, Intel
79 79 * adds most new features to this region.
80 80 * AMD adds non-Intel compatible
81 81 * information in the third, extended
82 82 * region. Intel uses this for everything
83 83 * including ISA extensions, CPU
84 84 * features, cache information, topology,
85 85 * and more.
86 86 *
87 87 * There is a hole carved out of this
88 88 * region which is reserved for
89 89 * hypervisors.
90 90 *
91 91 * [ 40000000, 4fffffff ] This region, which is found in the
92 92 * middle of the previous region, is
93 93 * explicitly promised to never be used by
94 94 * CPUs. Instead, it is used by hypervisors
95 95 * to communicate information about
96 96 * themselves to the operating system. The
97 97 * values and details are unique for each
98 98 * hypervisor.
99 99 *
100 100 * [ 80000000, ffffffff ] This region is called the 'extended'
101 101 * region. Some of the low leaves mirror
102 102 * parts of the basic leaves. This region
103 103 * has generally been used by AMD for
104 104 * various extensions. For example, AMD-
105 105 * specific information about caches,
106 106 * features, and topology are found in this
107 107 * region.
108 108 *
109 109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
110 110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of
111 111 * the ranges, one of the primary things returned is the maximum valid leaf in
112 112 * that range. This allows for discovery of what range of CPUID is valid.
113 113 *
114 114 * The CPUs have potentially surprising behavior when using an invalid leaf or
115 115 * unimplemented leaf. If the requested leaf is within the valid basic or
116 116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
117 117 * set to zero. However, if you specify a leaf that is outside of a valid range,
118 118 * then instead it will be filled with the last valid _basic_ leaf. For example,
119 119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
120 120 * an invalid extended leaf will return the information for leaf 3.
121 121 *
122 122 * Some leaves are broken down into sub-leaves. This means that the value
123 123 * depends on both the leaf asked for in %eax and a secondary register. For
124 124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
125 125 * additional information. Or when getting topology information in leaf 0xb, the
126 126 * initial value in %ecx changes which level of the topology that you are
127 127 * getting information about.
128 128 *
129 129 * cpuid values are always kept to 32 bits regardless of whether or not the
130 130 * program is in 64-bit mode. When executing in 64-bit mode, the upper
131 131 * 32 bits of the register are always set to zero so that way the values are the
132 132 * same regardless of execution mode.
133 133 *
134 134 * ----------------------
135 135 * Identifying Processors
136 136 * ----------------------
137 137 *
138 138 * We can identify a processor in two steps. The first step looks at cpuid leaf
139 139 * 0. Leaf 0 contains the processor's vendor information. This is done by
140 140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
141 141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
142 142 *
143 143 * From there, a processor is identified by a combination of three different
144 144 * values:
145 145 *
146 146 * 1. Family
147 147 * 2. Model
148 148 * 3. Stepping
149 149 *
150 150 * Each vendor uses the family and model to uniquely identify a processor. The
151 151 * way that family and model are changed depends on the vendor. For example,
152 152 * Intel has been using family 0x6 for almost all of their processor since the
153 153 * Pentium Pro/Pentium II era, often called the P6. The model is used to
154 154 * identify the exact processor. Different models are often used for the client
155 155 * (consumer) and server parts. Even though each processor often has major
156 156 * architectural differences, they still are considered the same family by
157 157 * Intel.
158 158 *
159 159 * On the other hand, each major AMD architecture generally has its own family.
160 160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
161 161 * the model number is used to help identify specific processors.
162 162 *
163 163 * The stepping is used to refer to a revision of a specific microprocessor. The
164 164 * term comes from equipment used to produce masks that are used to create
165 165 * integrated circuits.
166 166 *
167 167 * The information is present in leaf 1, %eax. In technical documentation you
168 168 * will see the terms extended model and extended family. The original family,
169 169 * model, and stepping fields were each 4 bits wide. If the values in either
170 170 * are 0xf, then one is to consult the extended model and extended family, which
171 171 * take previously reserved bits and allow for a larger number of models and add
172 172 * 0xf to them.
173 173 *
174 174 * When we process this information, we store the full family, model, and
175 175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and
176 176 * cpi_step, respectively. Whenever you are performing comparisons with the
177 177 * family, model, and stepping, you should use these members and not the raw
178 178 * values from cpuid. If you must use the raw values from cpuid directly, you
179 179 * must make sure that you add the extended model and family to the base model
180 180 * and family.
181 181 *
182 182 * In general, we do not use information about the family, model, and stepping
183 183 * to determine whether or not a feature is present; that is generally driven by
184 184 * specific leaves. However, when something we care about on the processor is
185 185 * not considered 'architectural' meaning that it is specific to a set of
186 186 * processors and not promised in the architecture model to be consistent from
187 187 * generation to generation, then we will fall back on this information. The
188 188 * most common cases where this comes up is when we have to workaround errata in
189 189 * the processor, are dealing with processor-specific features such as CPU
190 190 * performance counters, or we want to provide additional information for things
191 191 * such as fault management.
192 192 *
193 193 * While processors also do have a brand string, which is the name that people
194 194 * are familiar with when buying the processor, they are not meant for
195 195 * programmatic consumption. That is what the family, model, and stepping are
196 196 * for.
197 197 *
198 198 * ------------
199 199 * CPUID Passes
200 200 * ------------
201 201 *
202 202 * As part of performing feature detection, we break this into several different
203 203 * passes. The passes are as follows:
204 204 *
205 205 * Pass 0 This is a primordial pass done in locore.s to deal with
206 206 * Cyrix CPUs that don't support cpuid. The reality is that
207 207 * we likely don't run on them any more, but there is still
208 208 * logic for handling them.
209 209 *
210 210 * Pass 1 This is the primary pass and is responsible for doing a
211 211 * large number of different things:
212 212 *
213 213 * 1. Determine which vendor manufactured the CPU and
214 214 * determining the family, model, and stepping information.
215 215 *
216 216 * 2. Gathering a large number of feature flags to
217 217 * determine which features the CPU support and which
218 218 * indicate things that we need to do other work in the OS
219 219 * to enable. Features detected this way are added to the
220 220 * x86_featureset which can be queried to
221 221 * determine what we should do. This includes processing
222 222 * all of the basic and extended CPU features that we care
223 223 * about.
224 224 *
225 225 * 3. Determining the CPU's topology. This includes
226 226 * information about how many cores and threads are present
227 227 * in the package. It also is responsible for figuring out
228 228 * which logical CPUs are potentially part of the same core
229 229 * and what other resources they might share. For more
230 230 * information see the 'Topology' section.
231 231 *
232 232 * 4. Determining the set of CPU security-specific features
233 233 * that we need to worry about and determine the
234 234 * appropriate set of workarounds.
235 235 *
236 236 * Pass 1 on the boot CPU occurs before KMDB is started.
237 237 *
238 238 * Pass 2 The second pass is done after startup(). Here, we check
239 239 * other miscellaneous features. Most of this is gathering
240 240 * additional basic and extended features that we'll use in
241 241 * later passes or for debugging support.
242 242 *
243 243 * Pass 3 The third pass occurs after the kernel memory allocator
244 244 * has been fully initialized. This gathers information
245 245 * where we might need dynamic memory available for our
246 246 * uses. This includes several varying width leaves that
247 247 * have cache information and the processor's brand string.
248 248 *
249 249 * Pass 4 The fourth and final normal pass is performed after the
250 250 * kernel has brought most everything online. This is
251 251 * invoked from post_startup(). In this pass, we go through
252 252 * the set of features that we have enabled and turn that
253 253 * into the hardware auxiliary vector features that
254 254 * userland receives. This is used by userland, primarily
255 255 * by the run-time link-editor (RTLD), though userland
256 256 * software could also refer to it directly.
257 257 *
258 258 * Microcode After a microcode update, we do a selective rescan of
259 259 * the cpuid leaves to determine what features have
260 260 * changed. Microcode updates can provide more details
261 261 * about security related features to deal with issues like
262 262 * Spectre and L1TF. On occasion, vendors have violated
263 263 * their contract and removed bits. However, we don't try
264 264 * to detect that because that puts us in a situation that
265 265 * we really can't deal with. As such, the only thing we
266 266 * rescan are security related features today. See
267 267 * cpuid_pass_ucode().
268 268 *
269 269 * All of the passes (except pass 0) are run on all CPUs. However, for the most
270 270 * part we only care about what the boot CPU says about this information and use
271 271 * the other CPUs as a rough guide to sanity check that we have the same feature
272 272 * set.
273 273 *
274 274 * We do not support running multiple logical CPUs with disjoint, let alone
275 275 * different, feature sets.
276 276 *
277 277 * ------------------
278 278 * Processor Topology
279 279 * ------------------
280 280 *
281 281 * One of the important things that we need to do is to understand the topology
282 282 * of the underlying processor. When we say topology in this case, we're trying
283 283 * to understand the relationship between the logical CPUs that the operating
284 284 * system sees and the underlying physical layout. Different logical CPUs may
285 285 * share different resources which can have important consequences for the
286 286 * performance of the system. For example, they may share caches, execution
287 287 * units, and more.
288 288 *
289 289 * The topology of the processor changes from generation to generation and
290 290 * vendor to vendor. Along with that, different vendors use different
291 291 * terminology, and the operating system itself uses occasionally overlapping
292 292 * terminology. It's important to understand what this topology looks like so
293 293 * one can understand the different things that we try to calculate and
294 294 * determine.
295 295 *
296 296 * To get started, let's talk about a little bit of terminology that we've used
297 297 * so far, is used throughout this file, and is fairly generic across multiple
298 298 * vendors:
299 299 *
300 300 * CPU
301 301 * A central processing unit (CPU) refers to a logical and/or virtual
302 302 * entity that the operating system can execute instructions on. The
303 303 * underlying resources for this CPU may be shared between multiple
304 304 * entities; however, to the operating system it is a discrete unit.
305 305 *
306 306 * PROCESSOR and PACKAGE
307 307 *
308 308 * Generally, when we use the term 'processor' on its own, we are referring
309 309 * to the physical entity that one buys and plugs into a board. However,
310 310 * because processor has been overloaded and one might see it used to mean
311 311 * multiple different levels, we will instead use the term 'package' for
312 312 * the rest of this file. The term package comes from the electrical
313 313 * engineering side and refers to the physical entity that encloses the
314 314 * electronics inside. Strictly speaking the package can contain more than
315 315 * just the CPU, for example, on many processors it may also have what's
316 316 * called an 'integrated graphical processing unit (GPU)'. Because the
317 317 * package can encapsulate multiple units, it is the largest physical unit
318 318 * that we refer to.
319 319 *
320 320 * SOCKET
321 321 *
322 322 * A socket refers to unit on a system board (generally the motherboard)
323 323 * that can receive a package. A single package, or processor, is plugged
324 324 * into a single socket. A system may have multiple sockets. Often times,
325 325 * the term socket is used interchangeably with package and refers to the
326 326 * electrical component that has plugged in, and not the receptacle itself.
327 327 *
328 328 * CORE
329 329 *
330 330 * A core refers to the physical instantiation of a CPU, generally, with a
331 331 * full set of hardware resources available to it. A package may contain
332 332 * multiple cores inside of it or it may just have a single one. A
333 333 * processor with more than one core is often referred to as 'multi-core'.
334 334 * In illumos, we will use the feature X86FSET_CMP to refer to a system
335 335 * that has 'multi-core' processors.
336 336 *
337 337 * A core may expose a single logical CPU to the operating system, or it
338 338 * may expose multiple CPUs, which we call threads, defined below.
339 339 *
340 340 * Some resources may still be shared by cores in the same package. For
341 341 * example, many processors will share the level 3 cache between cores.
342 342 * Some AMD generations share hardware resources between cores. For more
343 343 * information on that see the section 'AMD Topology'.
344 344 *
345 345 * THREAD and STRAND
346 346 *
347 347 * In this file, generally a thread refers to a hardware resources and not
348 348 * the operating system's logical abstraction. A thread is always exposed
349 349 * as an independent logical CPU to the operating system. A thread belongs
350 350 * to a specific core. A core may have more than one thread. When that is
351 351 * the case, the threads that are part of the same core are often referred
352 352 * to as 'siblings'.
353 353 *
354 354 * When multiple threads exist, this is generally referred to as
355 355 * simultaneous multi-threading (SMT). When Intel introduced this in their
356 356 * processors they called it hyper-threading (HT). When multiple threads
357 357 * are active in a core, they split the resources of the core. For example,
358 358 * two threads may share the same set of hardware execution units.
359 359 *
360 360 * The operating system often uses the term 'strand' to refer to a thread.
361 361 * This helps disambiguate it from the software concept.
362 362 *
363 363 * CHIP
364 364 *
365 365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most
366 366 * base meaning, it is used to refer to a single integrated circuit, which
367 367 * may or may not be the only thing in the package. In illumos, when you
368 368 * see the term 'chip' it is almost always referring to the same thing as
369 369 * the 'package'. However, many vendors may use chip to refer to one of
370 370 * many integrated circuits that have been placed in the package. As an
371 371 * example, see the subsequent definition.
372 372 *
373 373 * To try and keep things consistent, we will only use chip when referring
374 374 * to the entire integrated circuit package, with the exception of the
375 375 * definition of multi-chip module (because it is in the name) and use the
376 376 * term 'die' when we want the more general, potential sub-component
377 377 * definition.
378 378 *
379 379 * DIE
380 380 *
381 381 * A die refers to an integrated circuit. Inside of the package there may
382 382 * be a single die or multiple dies. This is sometimes called a 'chip' in
383 383 * vendor's parlance, but in this file, we use the term die to refer to a
384 384 * subcomponent.
385 385 *
386 386 * MULTI-CHIP MODULE
387 387 *
388 388 * A multi-chip module (MCM) refers to putting multiple distinct chips that
389 389 * are connected together in the same package. When a multi-chip design is
390 390 * used, generally each chip is manufactured independently and then joined
391 391 * together in the package. For example, on AMD's Zen microarchitecture
392 392 * (family 0x17), the package contains several dies (the second meaning of
393 393 * chip from above) that are connected together.
394 394 *
395 395 * CACHE
396 396 *
397 397 * A cache is a part of the processor that maintains copies of recently
398 398 * accessed memory. Caches are split into levels and then into types.
399 399 * Commonly there are one to three levels, called level one, two, and
400 400 * three. The lower the level, the smaller it is, the closer it is to the
401 401 * execution units of the CPU, and the faster it is to access. The layout
402 402 * and design of the cache come in many different flavors, consult other
403 403 * resources for a discussion of those.
404 404 *
405 405 * Caches are generally split into two types, the instruction and data
406 406 * cache. The caches contain what their names suggest, the instruction
407 407 * cache has executable program text, while the data cache has all other
408 408 * memory that the processor accesses. As of this writing, data is kept
409 409 * coherent between all of the caches on x86, so if one modifies program
410 410 * text before it is executed, that will be in the data cache, and the
411 411 * instruction cache will be synchronized with that change when the
412 412 * processor actually executes those instructions. This coherency also
413 413 * covers the fact that data could show up in multiple caches.
414 414 *
415 415 * Generally, the lowest level caches are specific to a core. However, the
416 416 * last layer cache is shared between some number of cores. The number of
417 417 * CPUs sharing this last level cache is important. This has implications
418 418 * for the choices that the scheduler makes, as accessing memory that might
419 419 * be in a remote cache after thread migration can be quite expensive.
420 420 *
421 421 * Sometimes, the word cache is abbreviated with a '$', because in US
422 422 * English the word cache is pronounced the same as cash. So L1D$ refers to
423 423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used
424 424 * in the rest of this theory statement for clarity.
425 425 *
426 426 * MEMORY CONTROLLER
427 427 *
428 428 * The memory controller is a component that provides access to DRAM. Each
429 429 * memory controller can access a set number of DRAM channels. Each channel
430 430 * can have a number of DIMMs (sticks of memory) associated with it. A
431 431 * given package may have more than one memory controller. The association
432 432 * of the memory controller to a group of cores is important as it is
433 433 * cheaper to access memory on the controller that you are associated with.
434 434 *
435 435 * NUMA
436 436 *
437 437 * NUMA or non-uniform memory access, describes a way that systems are
438 438 * built. On x86, any processor core can address all of the memory in the
439 439 * system. However, When using multiple sockets or possibly within a
440 440 * multi-chip module, some of that memory is physically closer and some of
441 441 * it is further. Memory that is further away is more expensive to access.
442 442 * Consider the following image of multiple sockets with memory:
443 443 *
444 444 * +--------+ +--------+
445 445 * | DIMM A | +----------+ +----------+ | DIMM D |
446 446 * +--------+-+ | | | | +-+------+-+
447 447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
448 448 * +--------+-+ | | | | +-+------+-+
449 449 * | DIMM C | +----------+ +----------+ | DIMM F |
450 450 * +--------+ +--------+
451 451 *
452 452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
453 453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to
454 454 * access DIMMs A-C and more expensive to access D-F as it has to go
455 455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
456 456 * D-F are cheaper than A-C. While the socket form is the most common, when
457 457 * using multi-chip modules, this can also sometimes occur. For another
458 458 * example of this that's more involved, see the AMD topology section.
459 459 *
460 460 *
461 461 * Intel Topology
462 462 * --------------
463 463 *
464 464 * Most Intel processors since Nehalem, (as of this writing the current gen
465 465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
466 466 * the package is a single monolithic die. MCMs currently aren't used. Most
467 467 * parts have three levels of caches, with the L3 cache being shared between
468 468 * all of the cores on the package. The L1/L2 cache is generally specific to
469 469 * an individual core. The following image shows at a simplified level what
470 470 * this looks like. The memory controller is commonly part of something called
471 471 * the 'Uncore', that used to be separate physical chips that were not a part of
472 472 * the package, but are now part of the same chip.
473 473 *
474 474 * +-----------------------------------------------------------------------+
475 475 * | Package |
476 476 * | +-------------------+ +-------------------+ +-------------------+ |
477 477 * | | Core | | Core | | Core | |
478 478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
479 479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | |
480 480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | |
481 481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | |
482 482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | |
483 483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
484 484 * | | +--------------+ | | +--------------+ | | +--------------+ | |
485 485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | |
486 486 * | | +--------------+ | | +--------------+ | | +--------------+ | |
487 487 * | +-------------------+ +-------------------+ +-------------------+ |
488 488 * | +-------------------------------------------------------------------+ |
489 489 * | | Shared L3 Cache | |
490 490 * | +-------------------------------------------------------------------+ |
491 491 * | +-------------------------------------------------------------------+ |
492 492 * | | Memory Controller | |
493 493 * | +-------------------------------------------------------------------+ |
494 494 * +-----------------------------------------------------------------------+
495 495 *
496 496 * A side effect of this current architecture is that what we care about from a
497 497 * scheduling and topology perspective, is simplified. In general we care about
498 498 * understanding which logical CPUs are part of the same core and socket.
499 499 *
500 500 * To determine the relationship between threads and cores, Intel initially used
501 501 * the identifier in the advanced programmable interrupt controller (APIC). They
502 502 * also added cpuid leaf 4 to give additional information about the number of
503 503 * threads and CPUs in the processor. With the addition of x2apic (which
504 504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
505 505 * additional cpuid topology leaf 0xB was added.
506 506 *
507 507 * AMD Topology
508 508 * ------------
509 509 *
510 510 * When discussing AMD topology, we want to break this into three distinct
511 511 * generations of topology. There's the basic topology that has been used in
512 512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
513 513 * with family 0x15 (Bulldozer), and there's the topology that was introduced
514 514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth
515 515 * talking about.
516 516 *
517 517 * Until the introduction of family 0x17 (Zen), AMD did not implement something
518 518 * that they considered SMT. Whether or not the AMD processors have SMT
519 519 * influences many things including scheduling and reliability, availability,
520 520 * and serviceability (RAS) features.
521 521 *
522 522 * NODE
523 523 *
524 524 * AMD uses the term node to refer to a die that contains a number of cores
525 525 * and I/O resources. Depending on the processor family and model, more
526 526 * than one node can be present in the package. When there is more than one
527 527 * node this indicates a multi-chip module. Usually each node has its own
528 528 * access to memory and I/O devices. This is important and generally
529 529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a
530 530 * result, we track this relationship in the operating system.
531 531 *
532 532 * In processors with an L3 cache, the L3 cache is generally shared across
533 533 * the entire node, though the way this is carved up varies from generation
534 534 * to generation.
535 535 *
536 536 * BULLDOZER
537 537 *
538 538 * Starting with the Bulldozer family (0x15) and continuing until the
539 539 * introduction of the Zen microarchitecture, AMD introduced the idea of a
540 540 * compute unit. In a compute unit, two traditional cores share a number of
541 541 * hardware resources. Critically, they share the FPU, L1 instruction
542 542 * cache, and the L2 cache. Several compute units were then combined inside
543 543 * of a single node. Because the integer execution units, L1 data cache,
544 544 * and some other resources were not shared between the cores, AMD never
545 545 * considered this to be SMT.
546 546 *
547 547 * ZEN
548 548 *
549 549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module
550 550 * is called Zeppelin. These modules are similar to the idea of nodes used
551 551 * previously. Each of these nodes has two DRAM channels which all of the
552 552 * cores in the node can access uniformly. These nodes are linked together
553 553 * in the package, creating a NUMA environment.
554 554 *
555 555 * The Zeppelin die itself contains two different 'core complexes'. Each
556 556 * core complex consists of four cores which each have two threads, for a
557 557 * total of 8 logical CPUs per complex. Unlike other generations,
558 558 * where all the logical CPUs in a given node share the L3 cache, here each
559 559 * core complex has its own shared L3 cache.
560 560 *
561 561 * A further thing that we need to consider is that in some configurations,
562 562 * particularly with the Threadripper line of processors, not every die
563 563 * actually has its memory controllers wired up to actual memory channels.
564 564 * This means that some cores have memory attached to them and others
565 565 * don't.
566 566 *
567 567 * To put Zen in perspective, consider the following images:
568 568 *
569 569 * +--------------------------------------------------------+
570 570 * | Core Complex |
571 571 * | +-------------------+ +-------------------+ +---+ |
572 572 * | | Core +----+ | | Core +----+ | | | |
573 573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | |
574 574 * | | | Thread | +----+ | | | Thread | +----+ | | | |
575 575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | |
576 576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | |
577 577 * | | +--------+ +--+ | | +--------+ +--+ | | | |
578 578 * | +-------------------+ +-------------------+ | C | |
579 579 * | +-------------------+ +-------------------+ | a | |
580 580 * | | Core +----+ | | Core +----+ | | c | |
581 581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | |
582 582 * | | | Thread | +----+ | | | Thread | +----+ | | e | |
583 583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | |
584 584 * | | | Thread | |L1| | | | Thread | |L1| | | | |
585 585 * | | +--------+ +--+ | | +--------+ +--+ | | | |
586 586 * | +-------------------+ +-------------------+ +---+ |
587 587 * | |
588 588 * +--------------------------------------------------------+
589 589 *
590 590 * This first image represents a single Zen core complex that consists of four
591 591 * cores.
592 592 *
593 593 *
594 594 * +--------------------------------------------------------+
595 595 * | Zeppelin Die |
596 596 * | +--------------------------------------------------+ |
597 597 * | | I/O Units (PCIe, SATA, USB, etc.) | |
598 598 * | +--------------------------------------------------+ |
599 599 * | HH |
600 600 * | +-----------+ HH +-----------+ |
601 601 * | | | HH | | |
602 602 * | | Core |==========| Core | |
603 603 * | | Complex |==========| Complex | |
604 604 * | | | HH | | |
605 605 * | +-----------+ HH +-----------+ |
606 606 * | HH |
607 607 * | +--------------------------------------------------+ |
608 608 * | | Memory Controller | |
609 609 * | +--------------------------------------------------+ |
610 610 * | |
611 611 * +--------------------------------------------------------+
612 612 *
613 613 * This image represents a single Zeppelin Die. Note how both cores are
614 614 * connected to the same memory controller and I/O units. While each core
615 615 * complex has its own L3 cache as seen in the first image, they both have
616 616 * uniform access to memory.
617 617 *
618 618 *
619 619 * PP PP
620 620 * PP PP
621 621 * +----------PP---------------------PP---------+
622 622 * | PP PP |
623 623 * | +-----------+ +-----------+ |
624 624 * | | | | | |
625 625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
626 626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
627 627 * | | | | | |
628 628 * | +-----------+ooo ...+-----------+ |
629 629 * | HH ooo ... HH |
630 630 * | HH oo.. HH |
631 631 * | HH ..oo HH |
632 632 * | HH ... ooo HH |
633 633 * | +-----------+... ooo+-----------+ |
634 634 * | | | | | |
635 635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
636 636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
637 637 * | | | | | |
638 638 * | +-----------+ +-----------+ |
639 639 * | PP PP |
640 640 * +----------PP---------------------PP---------+
641 641 * PP PP
642 642 * PP PP
643 643 *
644 644 * This image represents a single Zen package. In this example, it has four
645 645 * Zeppelin dies, though some configurations only have a single one. In this
646 646 * example, each die is directly connected to the next. Also, each die is
647 647 * represented as being connected to memory by the 'M' character and connected
648 648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
649 649 * die is made up of two core complexes, we have multiple different NUMA
650 650 * domains that we care about for these systems.
651 651 *
652 652 * CPUID LEAVES
653 653 *
654 654 * There are a few different CPUID leaves that we can use to try and understand
655 655 * the actual state of the world. As part of the introduction of family 0xf, AMD
656 656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical
657 657 * processors that are in the system. Because families before Zen didn't have
658 658 * SMT, this was always the number of cores that were in the system. However, it
659 659 * should always be thought of as the number of logical threads to be consistent
660 660 * between generations. In addition we also get the size of the APIC ID that is
661 661 * used to represent the number of logical processors. This is important for
662 662 * deriving topology information.
663 663 *
664 664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
665 665 * bit between Bulldozer and later families, but it is quite useful in
666 666 * determining the topology information. Because this information has changed
667 667 * across family generations, it's worth calling out what these mean
668 668 * explicitly. The registers have the following meanings:
669 669 *
670 670 * %eax The APIC ID. The entire register is defined to have a 32-bit
671 671 * APIC ID, even though on systems without x2apic support, it will
672 672 * be limited to 8 bits.
673 673 *
674 674 * %ebx On Bulldozer-era systems this contains information about the
675 675 * number of cores that are in a compute unit (cores that share
676 676 * resources). It also contains a per-package compute unit ID that
677 677 * identifies which compute unit the logical CPU is a part of.
678 678 *
679 679 * On Zen-era systems this instead contains the number of threads
680 680 * per core and the ID of the core that the logical CPU is a part
681 681 * of. Note, this ID is unique only to the package, it is not
682 682 * globally unique across the entire system.
683 683 *
684 684 * %ecx This contains the number of nodes that exist in the package. It
685 685 * also contains an ID that identifies which node the logical CPU
686 686 * is a part of.
687 687 *
688 688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the
689 689 * cache layout to determine which logical CPUs are sharing which caches.
690 690 *
691 691 * illumos Topology
692 692 * ----------------
693 693 *
694 694 * Based on the above we synthesize the information into several different
695 695 * variables that we store in the 'struct cpuid_info'. We'll go into the details
696 696 * of what each member is supposed to represent and their uniqueness. In
697 697 * general, there are two levels of uniqueness that we care about. We care about
698 698 * an ID that is globally unique. That means that it will be unique across all
699 699 * entities in the system. For example, the default logical CPU ID is globally
700 700 * unique. On the other hand, there is some information that we only care about
701 701 * being unique within the context of a single package / socket. Here are the
702 702 * variables that we keep track of and their meaning.
703 703 *
704 704 * Several of the values that are asking for an identifier, with the exception
705 705 * of cpi_apicid, are allowed to be synthetic.
706 706 *
707 707 *
708 708 * cpi_apicid
709 709 *
710 710 * This is the value of the CPU's APIC id. This should be the full 32-bit
711 711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
712 712 * APIC ID. This value is globally unique between all logical CPUs across
713 713 * all packages. This is usually required by the APIC.
714 714 *
715 715 * cpi_chipid
716 716 *
717 717 * This value indicates the ID of the package that the logical CPU is a
718 718 * part of. This value is allowed to be synthetic. It is usually derived by
719 719 * taking the CPU's APIC ID and determining how many bits are used to
720 720 * represent CPU cores in the package. All logical CPUs that are part of
721 721 * the same package must have the same value.
722 722 *
723 723 * cpi_coreid
724 724 *
725 725 * This represents the ID of a CPU core. Two logical CPUs should only have
726 726 * the same cpi_coreid value if they are part of the same core. These
727 727 * values may be synthetic. On systems that support SMT, this value is
728 728 * usually derived from the APIC ID, otherwise it is often synthetic and
729 729 * just set to the value of the cpu_id in the cpu_t.
730 730 *
731 731 * cpi_pkgcoreid
732 732 *
733 733 * This is similar to the cpi_coreid in that logical CPUs that are part of
734 734 * the same core should have the same ID. The main difference is that these
735 735 * values are only required to be unique to a given socket.
736 736 *
737 737 * cpi_clogid
738 738 *
739 739 * This represents the logical ID of a logical CPU. This value should be
740 740 * unique within a given socket for each logical CPU. This is allowed to be
741 741 * synthetic, though it is usually based off of the CPU's apic ID. The
742 742 * broader system expects that logical CPUs that have are part of the same
743 743 * core have contiguous numbers. For example, if there were two threads per
744 744 * core, then the core IDs divided by two should be the same and the first
745 745 * modulus two should be zero and the second one. For example, IDs 4 and 5
746 746 * indicate two logical CPUs that are part of the same core. But IDs 5 and
747 747 * 6 represent two logical CPUs that are part of different cores.
748 748 *
749 749 * While it is common for the cpi_coreid and the cpi_clogid to be derived
750 750 * from the same source, strictly speaking, they don't have to be and the
751 751 * two values should be considered logically independent. One should not
752 752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
753 753 * some kind of relationship. While this is tempting, we've seen cases on
754 754 * AMD family 0xf where the system's cpu id is not related to its APIC ID.
755 755 *
756 756 * cpi_ncpu_per_chip
757 757 *
758 758 * This value indicates the total number of logical CPUs that exist in the
759 759 * physical package. Critically, this is not the number of logical CPUs
760 760 * that exist for just the single core.
761 761 *
762 762 * This value should be the same for all logical CPUs in the same package.
763 763 *
764 764 * cpi_ncore_per_chip
765 765 *
766 766 * This value indicates the total number of physical CPU cores that exist
767 767 * in the package. The system compares this value with cpi_ncpu_per_chip to
768 768 * determine if simultaneous multi-threading (SMT) is enabled. When
769 769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
770 770 * the X86FSET_HTT feature is not set. If this value is greater than one,
771 771 * than we consider the processor to have the feature X86FSET_CMP, to
772 772 * indicate that there is support for more than one core.
773 773 *
774 774 * This value should be the same for all logical CPUs in the same package.
775 775 *
776 776 * cpi_procnodes_per_pkg
777 777 *
778 778 * This value indicates the number of 'nodes' that exist in the package.
779 779 * When processors are actually a multi-chip module, this represents the
780 780 * number of such modules that exist in the package. Currently, on Intel
781 781 * based systems this member is always set to 1.
782 782 *
783 783 * This value should be the same for all logical CPUs in the same package.
784 784 *
785 785 * cpi_procnodeid
786 786 *
787 787 * This value indicates the ID of the node that the logical CPU is a part
788 788 * of. All logical CPUs that are in the same node must have the same value
789 789 * here. This value must be unique across all of the packages in the
790 790 * system. On Intel based systems, this is currently set to the value in
791 791 * cpi_chipid because there is only one node.
792 792 *
793 793 * cpi_cores_per_compunit
794 794 *
795 795 * This value indicates the number of cores that are part of a compute
796 796 * unit. See the AMD topology section for this. This member only has real
797 797 * meaning currently for AMD Bulldozer family processors. For all other
798 798 * processors, this should currently be set to 1.
799 799 *
800 800 * cpi_compunitid
801 801 *
802 802 * This indicates the compute unit that the logical CPU belongs to. For
803 803 * processors without AMD Bulldozer-style compute units this should be set
804 804 * to the value of cpi_coreid.
805 805 *
806 806 * cpi_ncpu_shr_last_cache
807 807 *
808 808 * This indicates the number of logical CPUs that are sharing the same last
809 809 * level cache. This value should be the same for all CPUs that are sharing
810 810 * that cache. The last cache refers to the cache that is closest to memory
811 811 * and furthest away from the CPU.
812 812 *
813 813 * cpi_last_lvl_cacheid
814 814 *
815 815 * This indicates the ID of the last cache that the logical CPU uses. This
816 816 * cache is often shared between multiple logical CPUs and is the cache
817 817 * that is closest to memory and furthest away from the CPU. This value
818 818 * should be the same for a group of logical CPUs only if they actually
819 819 * share the same last level cache. IDs should not overlap between
820 820 * packages.
821 821 *
822 822 * cpi_ncore_bits
823 823 *
824 824 * This indicates the number of bits that are required to represent all of
825 825 * the cores in the system. As cores are derived based on their APIC IDs,
826 826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
827 827 * this value to be larger than the actual number of IDs that are present
828 828 * in the system. This is used to size tables by the CMI framework. It is
829 829 * only filled in for Intel and AMD CPUs.
830 830 *
831 831 * cpi_nthread_bits
832 832 *
833 833 * This indicates the number of bits required to represent all of the IDs
834 834 * that cover the logical CPUs that exist on a given core. It's OK for this
835 835 * value to be larger than the actual number of IDs that are present in the
836 836 * system. This is used to size tables by the CMI framework. It is
837 837 * only filled in for Intel and AMD CPUs.
838 838 *
839 839 * -----------
840 840 * Hypervisors
841 841 * -----------
842 842 *
843 843 * If trying to manage the differences between vendors wasn't bad enough, it can
844 844 * get worse thanks to our friend hardware virtualization. Hypervisors are given
845 845 * the ability to interpose on all cpuid instructions and change them to suit
846 846 * their purposes. In general, this is necessary as the hypervisor wants to be
847 847 * able to present a more uniform set of features or not necessarily give the
848 848 * guest operating system kernel knowledge of all features so it can be
849 849 * more easily migrated between systems.
850 850 *
851 851 * When it comes to trying to determine topology information, this can be a
852 852 * double edged sword. When a hypervisor doesn't actually implement a cpuid
853 853 * leaf, it'll often return all zeros. Because of that, you'll often see various
854 854 * checks scattered about fields being non-zero before we assume we can use
855 855 * them.
856 856 *
857 857 * When it comes to topology information, the hypervisor is often incentivized
858 858 * to lie to you about topology. This is because it doesn't always actually
859 859 * guarantee that topology at all. The topology path we take in the system
860 860 * depends on how the CPU advertises itself. If it advertises itself as an Intel
861 861 * or AMD CPU, then we basically do our normal path. However, when they don't
862 862 * use an actual vendor, then that usually turns into multiple one-core CPUs
863 863 * that we enumerate that are often on different sockets. The actual behavior
864 864 * depends greatly on what the hypervisor actually exposes to us.
865 865 *
866 866 * --------------------
867 867 * Exposing Information
868 868 * --------------------
869 869 *
870 870 * We expose CPUID information in three different forms in the system.
871 871 *
872 872 * The first is through the x86_featureset variable. This is used in conjunction
873 873 * with the is_x86_feature() function. This is queried by x86-specific functions
874 874 * to determine which features are or aren't present in the system and to make
875 875 * decisions based upon them. For example, users of this include everything from
876 876 * parts of the system dedicated to reliability, availability, and
877 877 * serviceability (RAS), to making decisions about how to handle security
878 878 * mitigations, to various x86-specific drivers. General purpose or
879 879 * architecture independent drivers should never be calling this function.
880 880 *
881 881 * The second means is through the auxiliary vector. The auxiliary vector is a
882 882 * series of tagged data that the kernel passes down to a user program when it
883 883 * begins executing. This information is used to indicate to programs what
884 884 * instruction set extensions are present. For example, information about the
885 885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down
886 886 * since user programs cannot make use of it. However, things like the AVX
887 887 * instruction sets are. Programs use this information to make run-time
888 888 * decisions about what features they should use. As an example, the run-time
889 889 * link-editor (rtld) can relocate different functions depending on the hardware
890 890 * support available.
891 891 *
892 892 * The final form is through a series of accessor functions that all have the
893 893 * form cpuid_get*. This is used by a number of different subsystems in the
894 894 * kernel to determine more detailed information about what we're running on,
895 895 * topology information, etc. Some of these subsystems include processor groups
896 896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
897 897 * microcode, and performance monitoring. These functions all ASSERT that the
898 898 * CPU they're being called on has reached a certain cpuid pass. If the passes
899 899 * are rearranged, then this needs to be adjusted.
900 900 */
901 901
902 902 #include <sys/types.h>
903 903 #include <sys/archsystm.h>
904 904 #include <sys/x86_archext.h>
905 905 #include <sys/kmem.h>
906 906 #include <sys/systm.h>
907 907 #include <sys/cmn_err.h>
908 908 #include <sys/sunddi.h>
909 909 #include <sys/sunndi.h>
910 910 #include <sys/cpuvar.h>
911 911 #include <sys/processor.h>
912 912 #include <sys/sysmacros.h>
913 913 #include <sys/pg.h>
914 914 #include <sys/fp.h>
915 915 #include <sys/controlregs.h>
916 916 #include <sys/bitmap.h>
917 917 #include <sys/auxv_386.h>
918 918 #include <sys/memnode.h>
919 919 #include <sys/pci_cfgspace.h>
920 920 #include <sys/comm_page.h>
921 921 #include <sys/mach_mmu.h>
922 922 #include <sys/ucode.h>
923 923 #include <sys/tsc.h>
924 924
925 925 #ifdef __xpv
926 926 #include <sys/hypervisor.h>
927 927 #else
928 928 #include <sys/ontrap.h>
929 929 #endif
930 930
931 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
932 932 uint_t x86_type = X86_TYPE_OTHER;
933 933 uint_t x86_clflush_size = 0;
934 934
935 935 #if defined(__xpv)
936 936 int x86_use_pcid = 0;
937 937 int x86_use_invpcid = 0;
938 938 #else
939 939 int x86_use_pcid = -1;
940 940 int x86_use_invpcid = -1;
941 941 #endif
942 942
943 943 uint_t pentiumpro_bug4046376;
944 944
945 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
946 946
947 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
948 948 "lgpg",
949 949 "tsc",
950 950 "msr",
951 951 "mtrr",
952 952 "pge",
953 953 "de",
954 954 "cmov",
955 955 "mmx",
956 956 "mca",
957 957 "pae",
958 958 "cv8",
959 959 "pat",
960 960 "sep",
961 961 "sse",
962 962 "sse2",
963 963 "htt",
964 964 "asysc",
965 965 "nx",
966 966 "sse3",
967 967 "cx16",
968 968 "cmp",
969 969 "tscp",
970 970 "mwait",
971 971 "sse4a",
972 972 "cpuid",
973 973 "ssse3",
974 974 "sse4_1",
975 975 "sse4_2",
976 976 "1gpg",
977 977 "clfsh",
978 978 "64",
979 979 "aes",
980 980 "pclmulqdq",
981 981 "xsave",
982 982 "avx",
983 983 "vmx",
984 984 "svm",
985 985 "topoext",
986 986 "f16c",
987 987 "rdrand",
988 988 "x2apic",
989 989 "avx2",
990 990 "bmi1",
991 991 "bmi2",
992 992 "fma",
993 993 "smep",
994 994 "smap",
995 995 "adx",
996 996 "rdseed",
997 997 "mpx",
998 998 "avx512f",
999 999 "avx512dq",
1000 1000 "avx512pf",
1001 1001 "avx512er",
1002 1002 "avx512cd",
1003 1003 "avx512bw",
1004 1004 "avx512vl",
1005 1005 "avx512fma",
1006 1006 "avx512vbmi",
1007 1007 "avx512_vpopcntdq",
1008 1008 "avx512_4vnniw",
1009 1009 "avx512_4fmaps",
1010 1010 "xsaveopt",
1011 1011 "xsavec",
1012 1012 "xsaves",
1013 1013 "sha",
1014 1014 "umip",
1015 1015 "pku",
1016 1016 "ospke",
1017 1017 "pcid",
1018 1018 "invpcid",
1019 1019 "ibrs",
1020 1020 "ibpb",
1021 1021 "stibp",
1022 1022 "ssbd",
1023 1023 "ssbd_virt",
1024 1024 "rdcl_no",
1025 1025 "ibrs_all",
1026 1026 "rsba",
1027 1027 "ssb_no",
1028 1028 "stibp_all",
1029 1029 "flush_cmd",
1030 1030 "l1d_vmentry_no",
1031 1031 "fsgsbase",
1032 1032 "clflushopt",
1033 1033 "clwb",
1034 1034 "monitorx",
1035 1035 "clzero",
1036 1036 "xop",
1037 1037 "fma4",
1038 1038 "tbm",
1039 1039 "avx512_vnni"
1040 1040 };
1041 1041
1042 1042 boolean_t
1043 1043 is_x86_feature(void *featureset, uint_t feature)
1044 1044 {
1045 1045 ASSERT(feature < NUM_X86_FEATURES);
1046 1046 return (BT_TEST((ulong_t *)featureset, feature));
1047 1047 }
1048 1048
1049 1049 void
1050 1050 add_x86_feature(void *featureset, uint_t feature)
1051 1051 {
1052 1052 ASSERT(feature < NUM_X86_FEATURES);
1053 1053 BT_SET((ulong_t *)featureset, feature);
1054 1054 }
1055 1055
1056 1056 void
1057 1057 remove_x86_feature(void *featureset, uint_t feature)
1058 1058 {
1059 1059 ASSERT(feature < NUM_X86_FEATURES);
1060 1060 BT_CLEAR((ulong_t *)featureset, feature);
1061 1061 }
1062 1062
1063 1063 boolean_t
1064 1064 compare_x86_featureset(void *setA, void *setB)
1065 1065 {
1066 1066 /*
1067 1067 * We assume that the unused bits of the bitmap are always zero.
1068 1068 */
1069 1069 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1070 1070 return (B_TRUE);
1071 1071 } else {
1072 1072 return (B_FALSE);
1073 1073 }
1074 1074 }
1075 1075
1076 1076 void
1077 1077 print_x86_featureset(void *featureset)
1078 1078 {
1079 1079 uint_t i;
1080 1080
1081 1081 for (i = 0; i < NUM_X86_FEATURES; i++) {
1082 1082 if (is_x86_feature(featureset, i)) {
1083 1083 cmn_err(CE_CONT, "?x86_feature: %s\n",
1084 1084 x86_feature_names[i]);
1085 1085 }
1086 1086 }
1087 1087 }
1088 1088
1089 1089 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1090 1090 static size_t xsave_state_size = 0;
1091 1091 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1092 1092 boolean_t xsave_force_disable = B_FALSE;
1093 1093 extern int disable_smap;
1094 1094
1095 1095 /*
1096 1096 * This is set to platform type we are running on.
1097 1097 */
1098 1098 static int platform_type = -1;
1099 1099
1100 1100 #if !defined(__xpv)
1101 1101 /*
1102 1102 * Variable to patch if hypervisor platform detection needs to be
1103 1103 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1104 1104 */
1105 1105 int enable_platform_detection = 1;
1106 1106 #endif
1107 1107
1108 1108 /*
1109 1109 * monitor/mwait info.
1110 1110 *
1111 1111 * size_actual and buf_actual are the real address and size allocated to get
1112 1112 * proper mwait_buf alignement. buf_actual and size_actual should be passed
1113 1113 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
1114 1114 * processor cache-line alignment, but this is not guarantied in the furture.
1115 1115 */
1116 1116 struct mwait_info {
1117 1117 size_t mon_min; /* min size to avoid missed wakeups */
1118 1118 size_t mon_max; /* size to avoid false wakeups */
1119 1119 size_t size_actual; /* size actually allocated */
1120 1120 void *buf_actual; /* memory actually allocated */
1121 1121 uint32_t support; /* processor support of monitor/mwait */
1122 1122 };
1123 1123
1124 1124 /*
1125 1125 * xsave/xrestor info.
1126 1126 *
1127 1127 * This structure contains HW feature bits and the size of the xsave save area.
1128 1128 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1129 1129 * (xsave_state) to describe the xsave layout. However, at runtime the
1130 1130 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1131 1131 * xsave_state structure simply represents the legacy layout of the beginning
1132 1132 * of the xsave area.
1133 1133 */
1134 1134 struct xsave_info {
1135 1135 uint32_t xsav_hw_features_low; /* Supported HW features */
1136 1136 uint32_t xsav_hw_features_high; /* Supported HW features */
1137 1137 size_t xsav_max_size; /* max size save area for HW features */
1138 1138 size_t ymm_size; /* AVX: size of ymm save area */
1139 1139 size_t ymm_offset; /* AVX: offset for ymm save area */
1140 1140 size_t bndregs_size; /* MPX: size of bndregs save area */
1141 1141 size_t bndregs_offset; /* MPX: offset for bndregs save area */
1142 1142 size_t bndcsr_size; /* MPX: size of bndcsr save area */
1143 1143 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */
1144 1144 size_t opmask_size; /* AVX512: size of opmask save */
1145 1145 size_t opmask_offset; /* AVX512: offset for opmask save */
1146 1146 size_t zmmlo_size; /* AVX512: size of zmm 256 save */
1147 1147 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */
1148 1148 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */
1149 1149 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */
1150 1150 };
1151 1151
1152 1152
1153 1153 /*
1154 1154 * These constants determine how many of the elements of the
1155 1155 * cpuid we cache in the cpuid_info data structure; the
1156 1156 * remaining elements are accessible via the cpuid instruction.
1157 1157 */
1158 1158
1159 1159 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
1160 1160 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
1161 1161
1162 1162 /*
1163 1163 * See the big theory statement for a more detailed explanation of what some of
1164 1164 * these members mean.
1165 1165 */
1166 1166 struct cpuid_info {
1167 1167 uint_t cpi_pass; /* last pass completed */
1168 1168 /*
1169 1169 * standard function information
1170 1170 */
1171 1171 uint_t cpi_maxeax; /* fn 0: %eax */
1172 1172 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
1173 1173 uint_t cpi_vendor; /* enum of cpi_vendorstr */
1174 1174
1175 1175 uint_t cpi_family; /* fn 1: extended family */
1176 1176 uint_t cpi_model; /* fn 1: extended model */
1177 1177 uint_t cpi_step; /* fn 1: stepping */
1178 1178 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
1179 1179 /* AMD: package/socket # */
1180 1180 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
1181 1181 int cpi_clogid; /* fn 1: %ebx: thread # */
1182 1182 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
1183 1183 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
1184 1184 uint_t cpi_ncache; /* fn 2: number of elements */
1185 1185 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1186 1186 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
1187 1187 uint_t cpi_cache_leaf_size; /* Number of cache elements */
1188 1188 /* Intel fn: 4, AMD fn: 8000001d */
1189 1189 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */
1190 1190 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
1191 1191 /*
1192 1192 * extended function information
1193 1193 */
1194 1194 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
1195 1195 char cpi_brandstr[49]; /* fn 0x8000000[234] */
1196 1196 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
1197 1197 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
1198 1198 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */
1199 1199 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
1200 1200
1201 1201 id_t cpi_coreid; /* same coreid => strands share core */
1202 1202 int cpi_pkgcoreid; /* core number within single package */
1203 1203 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
1204 1204 /* Intel: fn 4: %eax[31-26] */
1205 1205
1206 1206 /*
1207 1207 * These values represent the number of bits that are required to store
1208 1208 * information about the number of cores and threads.
1209 1209 */
1210 1210 uint_t cpi_ncore_bits;
1211 1211 uint_t cpi_nthread_bits;
1212 1212 /*
1213 1213 * supported feature information
1214 1214 */
1215 1215 uint32_t cpi_support[6];
1216 1216 #define STD_EDX_FEATURES 0
1217 1217 #define AMD_EDX_FEATURES 1
1218 1218 #define TM_EDX_FEATURES 2
1219 1219 #define STD_ECX_FEATURES 3
1220 1220 #define AMD_ECX_FEATURES 4
1221 1221 #define STD_EBX_FEATURES 5
1222 1222 /*
1223 1223 * Synthesized information, where known.
1224 1224 */
1225 1225 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
1226 1226 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
1227 1227 uint32_t cpi_socket; /* Chip package/socket type */
1228 1228
1229 1229 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
1230 1230 uint32_t cpi_apicid;
1231 1231 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
1232 1232 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
1233 1233 /* Intel: 1 */
1234 1234 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
1235 1235 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
1236 1236
1237 1237 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
1238 1238 };
1239 1239
1240 1240
1241 1241 static struct cpuid_info cpuid_info0;
1242 1242
1243 1243 /*
1244 1244 * These bit fields are defined by the Intel Application Note AP-485
1245 1245 * "Intel Processor Identification and the CPUID Instruction"
1246 1246 */
1247 1247 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1248 1248 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1249 1249 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1250 1250 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1251 1251 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1252 1252 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1253 1253
1254 1254 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
1255 1255 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
1256 1256 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
1257 1257 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
1258 1258 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
1259 1259 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx)
1260 1260 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx)
1261 1261
1262 1262 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1263 1263 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1264 1264 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1265 1265 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1266 1266
1267 1267 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
1268 1268 #define CPI_XMAXEAX_MAX 0x80000100
1269 1269 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
1270 1270 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
1271 1271
1272 1272 /*
1273 1273 * Function 4 (Deterministic Cache Parameters) macros
1274 1274 * Defined by Intel Application Note AP-485
1275 1275 */
1276 1276 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
1277 1277 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
1278 1278 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
1279 1279 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
1280 1280 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
1281 1281 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
1282 1282 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
1283 1283
1284 1284 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
1285 1285 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
1286 1286 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
1287 1287
1288 1288 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
1289 1289
1290 1290 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
1291 1291
1292 1292
1293 1293 /*
1294 1294 * A couple of shorthand macros to identify "later" P6-family chips
1295 1295 * like the Pentium M and Core. First, the "older" P6-based stuff
1296 1296 * (loosely defined as "pre-Pentium-4"):
1297 1297 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1298 1298 */
1299 1299 #define IS_LEGACY_P6(cpi) ( \
1300 1300 cpi->cpi_family == 6 && \
1301 1301 (cpi->cpi_model == 1 || \
1302 1302 cpi->cpi_model == 3 || \
1303 1303 cpi->cpi_model == 5 || \
1304 1304 cpi->cpi_model == 6 || \
1305 1305 cpi->cpi_model == 7 || \
1306 1306 cpi->cpi_model == 8 || \
1307 1307 cpi->cpi_model == 0xA || \
1308 1308 cpi->cpi_model == 0xB) \
1309 1309 )
1310 1310
1311 1311 /* A "new F6" is everything with family 6 that's not the above */
1312 1312 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1313 1313
1314 1314 /* Extended family/model support */
1315 1315 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1316 1316 cpi->cpi_family >= 0xf)
1317 1317
1318 1318 /*
1319 1319 * Info for monitor/mwait idle loop.
1320 1320 *
1321 1321 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1322 1322 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1323 1323 * 2006.
1324 1324 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1325 1325 * Documentation Updates" #33633, Rev 2.05, December 2006.
1326 1326 */
1327 1327 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
1328 1328 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
1329 1329 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
1330 1330 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1331 1331 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
1332 1332 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
1333 1333 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1334 1334 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1335 1335 /*
1336 1336 * Number of sub-cstates for a given c-state.
1337 1337 */
1338 1338 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
1339 1339 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1340 1340
1341 1341 /*
1342 1342 * XSAVE leaf 0xD enumeration
1343 1343 */
1344 1344 #define CPUID_LEAFD_2_YMM_OFFSET 576
1345 1345 #define CPUID_LEAFD_2_YMM_SIZE 256
1346 1346
1347 1347 /*
1348 1348 * Common extended leaf names to cut down on typos.
1349 1349 */
1350 1350 #define CPUID_LEAF_EXT_0 0x80000000
1351 1351 #define CPUID_LEAF_EXT_8 0x80000008
1352 1352 #define CPUID_LEAF_EXT_1d 0x8000001d
1353 1353 #define CPUID_LEAF_EXT_1e 0x8000001e
1354 1354
1355 1355 /*
1356 1356 * Functions we consune from cpuid_subr.c; don't publish these in a header
1357 1357 * file to try and keep people using the expected cpuid_* interfaces.
1358 1358 */
1359 1359 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1360 1360 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1361 1361 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1362 1362 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1363 1363 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1364 1364
1365 1365 /*
1366 1366 * Apply up various platform-dependent restrictions where the
1367 1367 * underlying platform restrictions mean the CPU can be marked
1368 1368 * as less capable than its cpuid instruction would imply.
1369 1369 */
1370 1370 #if defined(__xpv)
1371 1371 static void
1372 1372 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1373 1373 {
1374 1374 switch (eax) {
1375 1375 case 1: {
1376 1376 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1377 1377 0 : CPUID_INTC_EDX_MCA;
1378 1378 cp->cp_edx &=
1379 1379 ~(mcamask |
1380 1380 CPUID_INTC_EDX_PSE |
1381 1381 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1382 1382 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1383 1383 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1384 1384 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1385 1385 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1386 1386 break;
1387 1387 }
1388 1388
1389 1389 case 0x80000001:
1390 1390 cp->cp_edx &=
1391 1391 ~(CPUID_AMD_EDX_PSE |
1392 1392 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1393 1393 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1394 1394 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1395 1395 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1396 1396 CPUID_AMD_EDX_TSCP);
1397 1397 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1398 1398 break;
1399 1399 default:
1400 1400 break;
1401 1401 }
1402 1402
1403 1403 switch (vendor) {
1404 1404 case X86_VENDOR_Intel:
1405 1405 switch (eax) {
1406 1406 case 4:
1407 1407 /*
1408 1408 * Zero out the (ncores-per-chip - 1) field
1409 1409 */
1410 1410 cp->cp_eax &= 0x03fffffff;
1411 1411 break;
1412 1412 default:
1413 1413 break;
1414 1414 }
1415 1415 break;
1416 1416 case X86_VENDOR_AMD:
1417 1417 switch (eax) {
1418 1418
1419 1419 case 0x80000001:
1420 1420 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1421 1421 break;
1422 1422
1423 1423 case CPUID_LEAF_EXT_8:
1424 1424 /*
1425 1425 * Zero out the (ncores-per-chip - 1) field
1426 1426 */
1427 1427 cp->cp_ecx &= 0xffffff00;
1428 1428 break;
1429 1429 default:
1430 1430 break;
1431 1431 }
1432 1432 break;
1433 1433 default:
1434 1434 break;
1435 1435 }
1436 1436 }
1437 1437 #else
1438 1438 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
1439 1439 #endif
1440 1440
1441 1441 /*
1442 1442 * Some undocumented ways of patching the results of the cpuid
1443 1443 * instruction to permit running Solaris 10 on future cpus that
1444 1444 * we don't currently support. Could be set to non-zero values
1445 1445 * via settings in eeprom.
1446 1446 */
1447 1447
1448 1448 uint32_t cpuid_feature_ecx_include;
1449 1449 uint32_t cpuid_feature_ecx_exclude;
1450 1450 uint32_t cpuid_feature_edx_include;
1451 1451 uint32_t cpuid_feature_edx_exclude;
1452 1452
1453 1453 /*
1454 1454 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1455 1455 */
1456 1456 void
1457 1457 cpuid_alloc_space(cpu_t *cpu)
1458 1458 {
1459 1459 /*
1460 1460 * By convention, cpu0 is the boot cpu, which is set up
1461 1461 * before memory allocation is available. All other cpus get
1462 1462 * their cpuid_info struct allocated here.
1463 1463 */
1464 1464 ASSERT(cpu->cpu_id != 0);
1465 1465 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1466 1466 cpu->cpu_m.mcpu_cpi =
1467 1467 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1468 1468 }
1469 1469
1470 1470 void
1471 1471 cpuid_free_space(cpu_t *cpu)
1472 1472 {
1473 1473 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1474 1474 int i;
1475 1475
1476 1476 ASSERT(cpi != NULL);
1477 1477 ASSERT(cpi != &cpuid_info0);
1478 1478
1479 1479 /*
1480 1480 * Free up any cache leaf related dynamic storage. The first entry was
1481 1481 * cached from the standard cpuid storage, so we should not free it.
1482 1482 */
1483 1483 for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1484 1484 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1485 1485 if (cpi->cpi_cache_leaf_size > 0)
1486 1486 kmem_free(cpi->cpi_cache_leaves,
1487 1487 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1488 1488
1489 1489 kmem_free(cpi, sizeof (*cpi));
1490 1490 cpu->cpu_m.mcpu_cpi = NULL;
1491 1491 }
1492 1492
1493 1493 #if !defined(__xpv)
1494 1494 /*
1495 1495 * Determine the type of the underlying platform. This is used to customize
1496 1496 * initialization of various subsystems (e.g. TSC). determine_platform() must
1497 1497 * only ever be called once to prevent two processors from seeing different
1498 1498 * values of platform_type. Must be called before cpuid_pass1(), the earliest
1499 1499 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1500 1500 */
1501 1501 void
1502 1502 determine_platform(void)
1503 1503 {
1504 1504 struct cpuid_regs cp;
1505 1505 uint32_t base;
1506 1506 uint32_t regs[4];
1507 1507 char *hvstr = (char *)regs;
1508 1508
1509 1509 ASSERT(platform_type == -1);
1510 1510
1511 1511 platform_type = HW_NATIVE;
1512 1512
1513 1513 if (!enable_platform_detection)
1514 1514 return;
1515 1515
1516 1516 /*
1517 1517 * If Hypervisor CPUID bit is set, try to determine hypervisor
1518 1518 * vendor signature, and set platform type accordingly.
1519 1519 *
1520 1520 * References:
1521 1521 * http://lkml.org/lkml/2008/10/1/246
1522 1522 * http://kb.vmware.com/kb/1009458
1523 1523 */
1524 1524 cp.cp_eax = 0x1;
1525 1525 (void) __cpuid_insn(&cp);
1526 1526 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1527 1527 cp.cp_eax = 0x40000000;
1528 1528 (void) __cpuid_insn(&cp);
1529 1529 regs[0] = cp.cp_ebx;
1530 1530 regs[1] = cp.cp_ecx;
1531 1531 regs[2] = cp.cp_edx;
1532 1532 regs[3] = 0;
1533 1533 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1534 1534 platform_type = HW_XEN_HVM;
1535 1535 return;
1536 1536 }
1537 1537 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1538 1538 platform_type = HW_VMWARE;
1539 1539 return;
1540 1540 }
1541 1541 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1542 1542 platform_type = HW_KVM;
1543 1543 return;
1544 1544 }
1545 1545 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1546 1546 platform_type = HW_BHYVE;
1547 1547 return;
1548 1548 }
1549 1549 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1550 1550 platform_type = HW_MICROSOFT;
1551 1551 } else {
1552 1552 /*
1553 1553 * Check older VMware hardware versions. VMware hypervisor is
1554 1554 * detected by performing an IN operation to VMware hypervisor
1555 1555 * port and checking that value returned in %ebx is VMware
1556 1556 * hypervisor magic value.
1557 1557 *
1558 1558 * References: http://kb.vmware.com/kb/1009458
1559 1559 */
1560 1560 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1561 1561 if (regs[1] == VMWARE_HVMAGIC) {
1562 1562 platform_type = HW_VMWARE;
1563 1563 return;
1564 1564 }
1565 1565 }
1566 1566
1567 1567 /*
1568 1568 * Check Xen hypervisor. In a fully virtualized domain,
1569 1569 * Xen's pseudo-cpuid function returns a string representing the
1570 1570 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1571 1571 * supported cpuid function. We need at least a (base + 2) leaf value
1572 1572 * to do what we want to do. Try different base values, since the
1573 1573 * hypervisor might use a different one depending on whether Hyper-V
1574 1574 * emulation is switched on by default or not.
1575 1575 */
1576 1576 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1577 1577 cp.cp_eax = base;
1578 1578 (void) __cpuid_insn(&cp);
1579 1579 regs[0] = cp.cp_ebx;
1580 1580 regs[1] = cp.cp_ecx;
1581 1581 regs[2] = cp.cp_edx;
1582 1582 regs[3] = 0;
1583 1583 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1584 1584 cp.cp_eax >= (base + 2)) {
1585 1585 platform_type &= ~HW_NATIVE;
1586 1586 platform_type |= HW_XEN_HVM;
1587 1587 return;
1588 1588 }
1589 1589 }
1590 1590 }
1591 1591
1592 1592 int
1593 1593 get_hwenv(void)
1594 1594 {
1595 1595 ASSERT(platform_type != -1);
1596 1596 return (platform_type);
1597 1597 }
1598 1598
1599 1599 int
1600 1600 is_controldom(void)
1601 1601 {
1602 1602 return (0);
1603 1603 }
1604 1604
1605 1605 #else
1606 1606
1607 1607 int
1608 1608 get_hwenv(void)
1609 1609 {
1610 1610 return (HW_XEN_PV);
1611 1611 }
1612 1612
1613 1613 int
1614 1614 is_controldom(void)
1615 1615 {
1616 1616 return (DOMAIN_IS_INITDOMAIN(xen_info));
1617 1617 }
1618 1618
1619 1619 #endif /* __xpv */
1620 1620
1621 1621 /*
1622 1622 * Make sure that we have gathered all of the CPUID leaves that we might need to
1623 1623 * determine topology. We assume that the standard leaf 1 has already been done
1624 1624 * and that xmaxeax has already been calculated.
1625 1625 */
1626 1626 static void
1627 1627 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1628 1628 {
1629 1629 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1630 1630
1631 1631 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1632 1632 struct cpuid_regs *cp;
1633 1633
1634 1634 cp = &cpi->cpi_extd[8];
1635 1635 cp->cp_eax = CPUID_LEAF_EXT_8;
1636 1636 (void) __cpuid_insn(cp);
1637 1637 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1638 1638 }
1639 1639
1640 1640 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1641 1641 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1642 1642 struct cpuid_regs *cp;
1643 1643
1644 1644 cp = &cpi->cpi_extd[0x1e];
1645 1645 cp->cp_eax = CPUID_LEAF_EXT_1e;
1646 1646 (void) __cpuid_insn(cp);
1647 1647 }
1648 1648 }
1649 1649
1650 1650 /*
1651 1651 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1652 1652 * it to everything else. If not, and we're on an AMD system where 8000001e is
1653 1653 * valid, then we use that. Othewrise, we fall back to the default value for the
1654 1654 * APIC ID in leaf 1.
1655 1655 */
1656 1656 static uint32_t
1657 1657 cpuid_gather_apicid(struct cpuid_info *cpi)
1658 1658 {
1659 1659 /*
1660 1660 * Leaf B changes based on the arguments to it. Beacuse we don't cache
1661 1661 * it, we need to gather it again.
1662 1662 */
1663 1663 if (cpi->cpi_maxeax >= 0xB) {
1664 1664 struct cpuid_regs regs;
1665 1665 struct cpuid_regs *cp;
1666 1666
1667 1667 cp = ®s;
1668 1668 cp->cp_eax = 0xB;
1669 1669 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1670 1670 (void) __cpuid_insn(cp);
1671 1671
1672 1672 if (cp->cp_ebx != 0) {
1673 1673 return (cp->cp_edx);
1674 1674 }
1675 1675 }
1676 1676
1677 1677 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1678 1678 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1679 1679 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1680 1680 return (cpi->cpi_extd[0x1e].cp_eax);
1681 1681 }
1682 1682
1683 1683 return (CPI_APIC_ID(cpi));
1684 1684 }
1685 1685
1686 1686 /*
1687 1687 * For AMD processors, attempt to calculate the number of chips and cores that
1688 1688 * exist. The way that we do this varies based on the generation, because the
1689 1689 * generations themselves have changed dramatically.
1690 1690 *
1691 1691 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1692 1692 * However, with the advent of family 17h (Zen) it actually tells us the number
1693 1693 * of threads, so we need to look at leaf 0x8000001e if available to determine
1694 1694 * its value. Otherwise, for all prior families, the number of enabled cores is
1695 1695 * the same as threads.
1696 1696 *
1697 1697 * If we do not have leaf 0x80000008, then we assume that this processor does
1698 1698 * not have anything. AMD's older CPUID specification says there's no reason to
1699 1699 * fall back to leaf 1.
1700 1700 *
1701 1701 * In some virtualization cases we will not have leaf 8000001e or it will be
1702 1702 * zero. When that happens we assume the number of threads is one.
1703 1703 */
1704 1704 static void
1705 1705 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1706 1706 {
1707 1707 uint_t nthreads, nthread_per_core;
1708 1708
1709 1709 nthreads = nthread_per_core = 1;
1710 1710
1711 1711 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1712 1712 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1713 1713 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1714 1714 nthreads = CPI_CPU_COUNT(cpi);
1715 1715 }
1716 1716
1717 1717 /*
1718 1718 * For us to have threads, and know about it, we have to be at least at
1719 1719 * family 17h and have the cpuid bit that says we have extended
1720 1720 * topology.
1721 1721 */
1722 1722 if (cpi->cpi_family >= 0x17 &&
1723 1723 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1724 1724 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1725 1725 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1726 1726 }
1727 1727
1728 1728 *ncpus = nthreads;
1729 1729 *ncores = nthreads / nthread_per_core;
1730 1730 }
1731 1731
1732 1732 /*
1733 1733 * Seed the initial values for the cores and threads for an Intel based
1734 1734 * processor. These values will be overwritten if we detect that the processor
1735 1735 * supports CPUID leaf 0xb.
1736 1736 */
1737 1737 static void
1738 1738 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1739 1739 {
1740 1740 /*
1741 1741 * Only seed the number of physical cores from the first level leaf 4
1742 1742 * information. The number of threads there indicate how many share the
1743 1743 * L1 cache, which may or may not have anything to do with the number of
1744 1744 * logical CPUs per core.
1745 1745 */
1746 1746 if (cpi->cpi_maxeax >= 4) {
1747 1747 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1748 1748 } else {
1749 1749 *ncores = 1;
1750 1750 }
1751 1751
1752 1752 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1753 1753 *ncpus = CPI_CPU_COUNT(cpi);
1754 1754 } else {
1755 1755 *ncpus = *ncores;
1756 1756 }
1757 1757 }
1758 1758
1759 1759 static boolean_t
1760 1760 cpuid_leafB_getids(cpu_t *cpu)
1761 1761 {
1762 1762 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1763 1763 struct cpuid_regs regs;
1764 1764 struct cpuid_regs *cp;
1765 1765
1766 1766 if (cpi->cpi_maxeax < 0xB)
1767 1767 return (B_FALSE);
1768 1768
1769 1769 cp = ®s;
1770 1770 cp->cp_eax = 0xB;
1771 1771 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1772 1772
1773 1773 (void) __cpuid_insn(cp);
1774 1774
1775 1775 /*
1776 1776 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1777 1777 * indicates that the extended topology enumeration leaf is
1778 1778 * available.
1779 1779 */
1780 1780 if (cp->cp_ebx != 0) {
1781 1781 uint32_t x2apic_id = 0;
1782 1782 uint_t coreid_shift = 0;
1783 1783 uint_t ncpu_per_core = 1;
1784 1784 uint_t chipid_shift = 0;
1785 1785 uint_t ncpu_per_chip = 1;
1786 1786 uint_t i;
1787 1787 uint_t level;
1788 1788
1789 1789 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1790 1790 cp->cp_eax = 0xB;
1791 1791 cp->cp_ecx = i;
1792 1792
1793 1793 (void) __cpuid_insn(cp);
1794 1794 level = CPI_CPU_LEVEL_TYPE(cp);
1795 1795
1796 1796 if (level == 1) {
1797 1797 x2apic_id = cp->cp_edx;
1798 1798 coreid_shift = BITX(cp->cp_eax, 4, 0);
1799 1799 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1800 1800 } else if (level == 2) {
1801 1801 x2apic_id = cp->cp_edx;
1802 1802 chipid_shift = BITX(cp->cp_eax, 4, 0);
1803 1803 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1804 1804 }
1805 1805 }
1806 1806
1807 1807 /*
1808 1808 * cpi_apicid is taken care of in cpuid_gather_apicid.
1809 1809 */
1810 1810 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1811 1811 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1812 1812 ncpu_per_core;
1813 1813 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1814 1814 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1815 1815 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1816 1816 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1817 1817 cpi->cpi_procnodeid = cpi->cpi_chipid;
1818 1818 cpi->cpi_compunitid = cpi->cpi_coreid;
1819 1819
1820 1820 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1821 1821 cpi->cpi_nthread_bits = coreid_shift;
1822 1822 cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1823 1823 }
1824 1824
1825 1825 return (B_TRUE);
1826 1826 } else {
1827 1827 return (B_FALSE);
1828 1828 }
1829 1829 }
1830 1830
1831 1831 static void
1832 1832 cpuid_intel_getids(cpu_t *cpu, void *feature)
1833 1833 {
1834 1834 uint_t i;
1835 1835 uint_t chipid_shift = 0;
1836 1836 uint_t coreid_shift = 0;
1837 1837 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1838 1838
1839 1839 /*
1840 1840 * There are no compute units or processor nodes currently on Intel.
1841 1841 * Always set these to one.
1842 1842 */
1843 1843 cpi->cpi_procnodes_per_pkg = 1;
1844 1844 cpi->cpi_cores_per_compunit = 1;
1845 1845
1846 1846 /*
1847 1847 * If cpuid Leaf B is present, use that to try and get this information.
1848 1848 * It will be the most accurate for Intel CPUs.
1849 1849 */
1850 1850 if (cpuid_leafB_getids(cpu))
1851 1851 return;
1852 1852
1853 1853 /*
1854 1854 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1855 1855 * and ncore_per_chip. These represent the largest power of two values
1856 1856 * that we need to cover all of the IDs in the system. Therefore, we use
1857 1857 * those values to seed the number of bits needed to cover information
1858 1858 * in the case when leaf B is not available. These values will probably
1859 1859 * be larger than required, but that's OK.
1860 1860 */
1861 1861 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1862 1862 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1863 1863
1864 1864 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1865 1865 chipid_shift++;
1866 1866
1867 1867 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1868 1868 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1869 1869
1870 1870 if (is_x86_feature(feature, X86FSET_CMP)) {
1871 1871 /*
1872 1872 * Multi-core (and possibly multi-threaded)
1873 1873 * processors.
1874 1874 */
1875 1875 uint_t ncpu_per_core;
1876 1876 if (cpi->cpi_ncore_per_chip == 1)
1877 1877 ncpu_per_core = cpi->cpi_ncpu_per_chip;
1878 1878 else if (cpi->cpi_ncore_per_chip > 1)
1879 1879 ncpu_per_core = cpi->cpi_ncpu_per_chip /
1880 1880 cpi->cpi_ncore_per_chip;
1881 1881 /*
1882 1882 * 8bit APIC IDs on dual core Pentiums
1883 1883 * look like this:
1884 1884 *
1885 1885 * +-----------------------+------+------+
1886 1886 * | Physical Package ID | MC | HT |
1887 1887 * +-----------------------+------+------+
1888 1888 * <------- chipid -------->
1889 1889 * <------- coreid --------------->
1890 1890 * <--- clogid -->
1891 1891 * <------>
1892 1892 * pkgcoreid
1893 1893 *
1894 1894 * Where the number of bits necessary to
1895 1895 * represent MC and HT fields together equals
1896 1896 * to the minimum number of bits necessary to
1897 1897 * store the value of cpi->cpi_ncpu_per_chip.
1898 1898 * Of those bits, the MC part uses the number
1899 1899 * of bits necessary to store the value of
1900 1900 * cpi->cpi_ncore_per_chip.
1901 1901 */
1902 1902 for (i = 1; i < ncpu_per_core; i <<= 1)
1903 1903 coreid_shift++;
1904 1904 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1905 1905 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1906 1906 } else if (is_x86_feature(feature, X86FSET_HTT)) {
1907 1907 /*
1908 1908 * Single-core multi-threaded processors.
1909 1909 */
1910 1910 cpi->cpi_coreid = cpi->cpi_chipid;
1911 1911 cpi->cpi_pkgcoreid = 0;
1912 1912 } else {
1913 1913 /*
1914 1914 * Single-core single-thread processors.
1915 1915 */
1916 1916 cpi->cpi_coreid = cpu->cpu_id;
1917 1917 cpi->cpi_pkgcoreid = 0;
1918 1918 }
1919 1919 cpi->cpi_procnodeid = cpi->cpi_chipid;
1920 1920 cpi->cpi_compunitid = cpi->cpi_coreid;
1921 1921 }
1922 1922
1923 1923 /*
1924 1924 * Historically, AMD has had CMP chips with only a single thread per core.
1925 1925 * However, starting in family 17h (Zen), this has changed and they now have
1926 1926 * multiple threads. Our internal core id needs to be a unique value.
1927 1927 *
1928 1928 * To determine the core id of an AMD system, if we're from a family before 17h,
1929 1929 * then we just use the cpu id, as that gives us a good value that will be
1930 1930 * unique for each core. If instead, we're on family 17h or later, then we need
1931 1931 * to do something more complicated. CPUID leaf 0x8000001e can tell us
1932 1932 * how many threads are in the system. Based on that, we'll shift the APIC ID.
1933 1933 * We can't use the normal core id in that leaf as it's only unique within the
1934 1934 * socket, which is perfect for cpi_pkgcoreid, but not us.
1935 1935 */
1936 1936 static id_t
1937 1937 cpuid_amd_get_coreid(cpu_t *cpu)
1938 1938 {
1939 1939 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1940 1940
1941 1941 if (cpi->cpi_family >= 0x17 &&
1942 1942 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1943 1943 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1944 1944 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1945 1945 if (nthreads > 1) {
1946 1946 VERIFY3U(nthreads, ==, 2);
1947 1947 return (cpi->cpi_apicid >> 1);
1948 1948 }
1949 1949 }
1950 1950
1951 1951 return (cpu->cpu_id);
1952 1952 }
1953 1953
1954 1954 /*
1955 1955 * IDs on AMD is a more challenging task. This is notable because of the
1956 1956 * following two facts:
1957 1957 *
1958 1958 * 1. Before family 0x17 (Zen), there was no support for SMT and there was
1959 1959 * also no way to get an actual unique core id from the system. As such, we
1960 1960 * synthesize this case by using cpu->cpu_id. This scheme does not,
1961 1961 * however, guarantee that sibling cores of a chip will have sequential
1962 1962 * coreids starting at a multiple of the number of cores per chip - that is
1963 1963 * usually the case, but if the ACPI MADT table is presented in a different
1964 1964 * order then we need to perform a few more gymnastics for the pkgcoreid.
1965 1965 *
1966 1966 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1967 1967 * called compute units. These compute units share the L1I cache, L2 cache,
1968 1968 * and the FPU. To deal with this, a new topology leaf was added in
1969 1969 * 0x8000001e. However, parts of this leaf have different meanings
1970 1970 * once we get to family 0x17.
1971 1971 */
1972 1972
1973 1973 static void
1974 1974 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1975 1975 {
1976 1976 int i, first_half, coreidsz;
1977 1977 uint32_t nb_caps_reg;
1978 1978 uint_t node2_1;
1979 1979 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1980 1980 struct cpuid_regs *cp;
1981 1981
1982 1982 /*
1983 1983 * Calculate the core id (this comes from hardware in family 0x17 if it
1984 1984 * hasn't been stripped by virtualization). We always set the compute
1985 1985 * unit id to the same value. Also, initialize the default number of
1986 1986 * cores per compute unit and nodes per package. This will be
1987 1987 * overwritten when we know information about a particular family.
1988 1988 */
1989 1989 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1990 1990 cpi->cpi_compunitid = cpi->cpi_coreid;
1991 1991 cpi->cpi_cores_per_compunit = 1;
1992 1992 cpi->cpi_procnodes_per_pkg = 1;
1993 1993
1994 1994 /*
1995 1995 * To construct the logical ID, we need to determine how many APIC IDs
1996 1996 * are dedicated to the cores and threads. This is provided for us in
1997 1997 * 0x80000008. However, if it's not present (say due to virtualization),
1998 1998 * then we assume it's one. This should be present on all 64-bit AMD
1999 1999 * processors. It was added in family 0xf (Hammer).
2000 2000 */
2001 2001 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2002 2002 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2003 2003
2004 2004 /*
2005 2005 * In AMD parlance chip is really a node while illumos
2006 2006 * uses chip as equivalent to socket/package.
2007 2007 */
2008 2008 if (coreidsz == 0) {
2009 2009 /* Use legacy method */
2010 2010 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2011 2011 coreidsz++;
2012 2012 if (coreidsz == 0)
2013 2013 coreidsz = 1;
2014 2014 }
2015 2015 } else {
2016 2016 /* Assume single-core part */
2017 2017 coreidsz = 1;
2018 2018 }
2019 2019 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2020 2020
2021 2021 /*
2022 2022 * The package core ID varies depending on the family. For family 17h,
2023 2023 * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2024 2024 * can use the clogid as is. When family 17h is virtualized, the clogid
2025 2025 * should be sufficient as if we don't have valid data in the leaf, then
2026 2026 * we won't think we have SMT, in which case the cpi_clogid should be
2027 2027 * sufficient.
2028 2028 */
2029 2029 if (cpi->cpi_family >= 0x17 &&
2030 2030 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2031 2031 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2032 2032 cpi->cpi_extd[0x1e].cp_ebx != 0) {
2033 2033 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2034 2034 } else {
2035 2035 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2036 2036 }
2037 2037
2038 2038 /*
2039 2039 * Obtain the node ID and compute unit IDs. If we're on family 0x15
2040 2040 * (bulldozer) or newer, then we can derive all of this from leaf
2041 2041 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2042 2042 */
2043 2043 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2044 2044 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2045 2045 cp = &cpi->cpi_extd[0x1e];
2046 2046
2047 2047 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2048 2048 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2049 2049
2050 2050 /*
2051 2051 * For Bulldozer-era CPUs, recalculate the compute unit
2052 2052 * information.
2053 2053 */
2054 2054 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2055 2055 cpi->cpi_cores_per_compunit =
2056 2056 BITX(cp->cp_ebx, 15, 8) + 1;
2057 2057 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2058 2058 (cpi->cpi_ncore_per_chip /
2059 2059 cpi->cpi_cores_per_compunit) *
2060 2060 (cpi->cpi_procnodeid /
2061 2061 cpi->cpi_procnodes_per_pkg);
2062 2062 }
2063 2063 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2064 2064 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2065 2065 } else if (cpi->cpi_family == 0x10) {
2066 2066 /*
2067 2067 * See if we are a multi-node processor.
2068 2068 * All processors in the system have the same number of nodes
2069 2069 */
2070 2070 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
2071 2071 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2072 2072 /* Single-node */
2073 2073 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2074 2074 coreidsz);
2075 2075 } else {
2076 2076
2077 2077 /*
2078 2078 * Multi-node revision D (2 nodes per package
2079 2079 * are supported)
2080 2080 */
2081 2081 cpi->cpi_procnodes_per_pkg = 2;
2082 2082
2083 2083 first_half = (cpi->cpi_pkgcoreid <=
2084 2084 (cpi->cpi_ncore_per_chip/2 - 1));
2085 2085
2086 2086 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2087 2087 /* We are BSP */
2088 2088 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2089 2089 } else {
2090 2090
2091 2091 /* We are AP */
2092 2092 /* NodeId[2:1] bits to use for reading F3xe8 */
2093 2093 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2094 2094
2095 2095 nb_caps_reg =
2096 2096 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2097 2097
2098 2098 /*
2099 2099 * Check IntNodeNum bit (31:30, but bit 31 is
2100 2100 * always 0 on dual-node processors)
2101 2101 */
2102 2102 if (BITX(nb_caps_reg, 30, 30) == 0)
2103 2103 cpi->cpi_procnodeid = node2_1 +
2104 2104 !first_half;
2105 2105 else
2106 2106 cpi->cpi_procnodeid = node2_1 +
2107 2107 first_half;
2108 2108 }
2109 2109 }
2110 2110 } else {
2111 2111 cpi->cpi_procnodeid = 0;
2112 2112 }
↓ open down ↓ |
2112 lines elided |
↑ open up ↑ |
2113 2113
2114 2114 cpi->cpi_chipid =
2115 2115 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2116 2116
2117 2117 cpi->cpi_ncore_bits = coreidsz;
2118 2118 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2119 2119 cpi->cpi_ncore_per_chip);
2120 2120 }
2121 2121
2122 2122 static void
2123 +spec_l1d_flush_noop(void)
2124 +{
2125 +}
2126 +
2127 +static void
2128 +spec_l1d_flush_msr(void)
2129 +{
2130 + wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2131 +}
2132 +
2133 +void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
2134 +
2135 +static void
2123 2136 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2124 2137 {
2125 2138 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2126 2139
2127 2140 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2128 2141 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2129 2142 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2130 2143 add_x86_feature(featureset, X86FSET_IBPB);
2131 2144 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2132 2145 add_x86_feature(featureset, X86FSET_IBRS);
2133 2146 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2134 2147 add_x86_feature(featureset, X86FSET_STIBP);
2135 2148 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2136 2149 add_x86_feature(featureset, X86FSET_IBRS_ALL);
2137 2150 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2138 2151 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2139 2152 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2140 2153 add_x86_feature(featureset, X86FSET_RSBA);
2141 2154 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2142 2155 add_x86_feature(featureset, X86FSET_SSBD);
2143 2156 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2144 2157 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2145 2158 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2146 2159 add_x86_feature(featureset, X86FSET_SSB_NO);
2147 2160 } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2148 2161 cpi->cpi_maxeax >= 7) {
2149 2162 struct cpuid_regs *ecp;
2150 2163 ecp = &cpi->cpi_std[7];
2151 2164
2152 2165 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2153 2166 add_x86_feature(featureset, X86FSET_IBRS);
2154 2167 add_x86_feature(featureset, X86FSET_IBPB);
2155 2168 }
2156 2169
2157 2170 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2158 2171 add_x86_feature(featureset, X86FSET_STIBP);
2159 2172 }
2160 2173
2161 2174 /*
2162 2175 * Don't read the arch caps MSR on xpv where we lack the
2163 2176 * on_trap().
2164 2177 */
2165 2178 #ifndef __xpv
2166 2179 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2167 2180 on_trap_data_t otd;
2168 2181
2169 2182 /*
2170 2183 * Be paranoid and assume we'll get a #GP.
2171 2184 */
2172 2185 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2173 2186 uint64_t reg;
2174 2187
2175 2188 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2176 2189 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2177 2190 add_x86_feature(featureset,
2178 2191 X86FSET_RDCL_NO);
2179 2192 }
2180 2193 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2181 2194 add_x86_feature(featureset,
2182 2195 X86FSET_IBRS_ALL);
2183 2196 }
2184 2197 if (reg & IA32_ARCH_CAP_RSBA) {
2185 2198 add_x86_feature(featureset,
2186 2199 X86FSET_RSBA);
2187 2200 }
2188 2201 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2189 2202 add_x86_feature(featureset,
2190 2203 X86FSET_L1D_VM_NO);
2191 2204 }
2192 2205 if (reg & IA32_ARCH_CAP_SSB_NO) {
2193 2206 add_x86_feature(featureset,
2194 2207 X86FSET_SSB_NO);
2195 2208 }
2196 2209 }
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
2197 2210 no_trap();
2198 2211 }
2199 2212 #endif /* !__xpv */
2200 2213
2201 2214 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2202 2215 add_x86_feature(featureset, X86FSET_SSBD);
2203 2216
2204 2217 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2205 2218 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2206 2219 }
2220 +
2221 + if (cpu->cpu_id != 0)
2222 + return;
2223 +
2224 + /*
2225 + * We're the boot CPU, so let's figure out our L1TF status.
2226 + *
2227 + * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
2228 + * need to exclude with ht_acquire(), and we don't need to flush.
2229 + */
2230 + if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
2231 + extern int ht_exclusion;
2232 + ht_exclusion = 0;
2233 + spec_l1d_flush = spec_l1d_flush_noop;
2234 + membar_producer();
2235 + return;
2236 + }
2237 +
2238 + /*
2239 + * If HT is enabled, we will need HT exclusion, as well as the flush on
2240 + * VM entry. If HT isn't enabled, we still need at least the flush for
2241 + * the L1TF sequential case.
2242 + *
2243 + * However, if X86FSET_L1D_VM_NO is set, we're most likely running
2244 + * inside a VM ourselves, and we don't need the flush.
2245 + *
2246 + * If we don't have the FLUSH_CMD available at all, we'd better just
2247 + * hope HT is disabled.
2248 + */
2249 + if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2250 + !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2251 + spec_l1d_flush = spec_l1d_flush_msr;
2252 + } else {
2253 + spec_l1d_flush = spec_l1d_flush_noop;
2254 + }
2255 +
2256 + membar_producer();
2207 2257 }
2208 2258
2209 2259 /*
2210 2260 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2211 2261 */
2212 2262 void
2213 2263 setup_xfem(void)
2214 2264 {
2215 2265 uint64_t flags = XFEATURE_LEGACY_FP;
2216 2266
2217 2267 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2218 2268
2219 2269 if (is_x86_feature(x86_featureset, X86FSET_SSE))
2220 2270 flags |= XFEATURE_SSE;
2221 2271
2222 2272 if (is_x86_feature(x86_featureset, X86FSET_AVX))
2223 2273 flags |= XFEATURE_AVX;
2224 2274
2225 2275 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2226 2276 flags |= XFEATURE_AVX512;
2227 2277
2228 2278 set_xcr(XFEATURE_ENABLED_MASK, flags);
2229 2279
2230 2280 xsave_bv_all = flags;
2231 2281 }
2232 2282
2233 2283 static void
2234 2284 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2235 2285 {
2236 2286 struct cpuid_info *cpi;
2237 2287
2238 2288 cpi = cpu->cpu_m.mcpu_cpi;
2239 2289
2240 2290 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2241 2291 cpuid_gather_amd_topology_leaves(cpu);
2242 2292 }
2243 2293
2244 2294 cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2245 2295
2246 2296 /*
2247 2297 * Before we can calculate the IDs that we should assign to this
2248 2298 * processor, we need to understand how many cores and threads it has.
2249 2299 */
2250 2300 switch (cpi->cpi_vendor) {
2251 2301 case X86_VENDOR_Intel:
2252 2302 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2253 2303 &cpi->cpi_ncore_per_chip);
2254 2304 break;
2255 2305 case X86_VENDOR_AMD:
2256 2306 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2257 2307 &cpi->cpi_ncore_per_chip);
2258 2308 break;
2259 2309 default:
2260 2310 /*
2261 2311 * If we have some other x86 compatible chip, it's not clear how
2262 2312 * they would behave. The most common case is virtualization
2263 2313 * today, though there are also 64-bit VIA chips. Assume that
2264 2314 * all we can get is the basic Leaf 1 HTT information.
2265 2315 */
2266 2316 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2267 2317 cpi->cpi_ncore_per_chip = 1;
2268 2318 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2269 2319 }
2270 2320 break;
2271 2321 }
2272 2322
2273 2323 /*
2274 2324 * Based on the calculated number of threads and cores, potentially
2275 2325 * assign the HTT and CMT features.
2276 2326 */
2277 2327 if (cpi->cpi_ncore_per_chip > 1) {
2278 2328 add_x86_feature(featureset, X86FSET_CMP);
2279 2329 }
2280 2330
2281 2331 if (cpi->cpi_ncpu_per_chip > 1 &&
2282 2332 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2283 2333 add_x86_feature(featureset, X86FSET_HTT);
2284 2334 }
2285 2335
2286 2336 /*
2287 2337 * Now that has been set up, we need to go through and calculate all of
2288 2338 * the rest of the parameters that exist. If we think the CPU doesn't
2289 2339 * have either SMT (HTT) or CMP, then we basically go through and fake
2290 2340 * up information in some way. The most likely case for this is
2291 2341 * virtualization where we have a lot of partial topology information.
2292 2342 */
2293 2343 if (!is_x86_feature(featureset, X86FSET_HTT) &&
2294 2344 !is_x86_feature(featureset, X86FSET_CMP)) {
2295 2345 /*
2296 2346 * This is a single core, single-threaded processor.
2297 2347 */
2298 2348 cpi->cpi_procnodes_per_pkg = 1;
2299 2349 cpi->cpi_cores_per_compunit = 1;
2300 2350 cpi->cpi_compunitid = 0;
2301 2351 cpi->cpi_chipid = -1;
2302 2352 cpi->cpi_clogid = 0;
2303 2353 cpi->cpi_coreid = cpu->cpu_id;
2304 2354 cpi->cpi_pkgcoreid = 0;
2305 2355 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2306 2356 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2307 2357 } else {
2308 2358 cpi->cpi_procnodeid = cpi->cpi_chipid;
2309 2359 }
2310 2360 } else {
2311 2361 switch (cpi->cpi_vendor) {
2312 2362 case X86_VENDOR_Intel:
2313 2363 cpuid_intel_getids(cpu, featureset);
2314 2364 break;
2315 2365 case X86_VENDOR_AMD:
2316 2366 cpuid_amd_getids(cpu, featureset);
2317 2367 break;
2318 2368 default:
2319 2369 /*
2320 2370 * In this case, it's hard to say what we should do.
2321 2371 * We're going to model them to the OS as single core
2322 2372 * threads. We don't have a good identifier for them, so
2323 2373 * we're just going to use the cpu id all on a single
2324 2374 * chip.
2325 2375 *
2326 2376 * This case has historically been different from the
2327 2377 * case above where we don't have HTT or CMP. While they
2328 2378 * could be combined, we've opted to keep it separate to
2329 2379 * minimize the risk of topology changes in weird cases.
2330 2380 */
2331 2381 cpi->cpi_procnodes_per_pkg = 1;
2332 2382 cpi->cpi_cores_per_compunit = 1;
2333 2383 cpi->cpi_chipid = 0;
2334 2384 cpi->cpi_coreid = cpu->cpu_id;
2335 2385 cpi->cpi_clogid = cpu->cpu_id;
2336 2386 cpi->cpi_pkgcoreid = cpu->cpu_id;
2337 2387 cpi->cpi_procnodeid = cpi->cpi_chipid;
2338 2388 cpi->cpi_compunitid = cpi->cpi_coreid;
2339 2389 break;
2340 2390 }
2341 2391 }
2342 2392 }
2343 2393
2344 2394 void
2345 2395 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2346 2396 {
2347 2397 uint32_t mask_ecx, mask_edx;
2348 2398 struct cpuid_info *cpi;
2349 2399 struct cpuid_regs *cp;
2350 2400 int xcpuid;
2351 2401 #if !defined(__xpv)
2352 2402 extern int idle_cpu_prefer_mwait;
2353 2403 #endif
2354 2404
2355 2405 /*
2356 2406 * Space statically allocated for BSP, ensure pointer is set
2357 2407 */
2358 2408 if (cpu->cpu_id == 0) {
2359 2409 if (cpu->cpu_m.mcpu_cpi == NULL)
2360 2410 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2361 2411 }
2362 2412
2363 2413 add_x86_feature(featureset, X86FSET_CPUID);
2364 2414
2365 2415 cpi = cpu->cpu_m.mcpu_cpi;
2366 2416 ASSERT(cpi != NULL);
2367 2417 cp = &cpi->cpi_std[0];
2368 2418 cp->cp_eax = 0;
2369 2419 cpi->cpi_maxeax = __cpuid_insn(cp);
2370 2420 {
2371 2421 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2372 2422 *iptr++ = cp->cp_ebx;
2373 2423 *iptr++ = cp->cp_edx;
2374 2424 *iptr++ = cp->cp_ecx;
2375 2425 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2376 2426 }
2377 2427
2378 2428 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2379 2429 x86_vendor = cpi->cpi_vendor; /* for compatibility */
2380 2430
2381 2431 /*
2382 2432 * Limit the range in case of weird hardware
2383 2433 */
2384 2434 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2385 2435 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2386 2436 if (cpi->cpi_maxeax < 1)
2387 2437 goto pass1_done;
2388 2438
2389 2439 cp = &cpi->cpi_std[1];
2390 2440 cp->cp_eax = 1;
2391 2441 (void) __cpuid_insn(cp);
2392 2442
2393 2443 /*
2394 2444 * Extract identifying constants for easy access.
2395 2445 */
2396 2446 cpi->cpi_model = CPI_MODEL(cpi);
2397 2447 cpi->cpi_family = CPI_FAMILY(cpi);
2398 2448
2399 2449 if (cpi->cpi_family == 0xf)
2400 2450 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2401 2451
2402 2452 /*
2403 2453 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2404 2454 * Intel, and presumably everyone else, uses model == 0xf, as
2405 2455 * one would expect (max value means possible overflow). Sigh.
2406 2456 */
2407 2457
2408 2458 switch (cpi->cpi_vendor) {
2409 2459 case X86_VENDOR_Intel:
2410 2460 if (IS_EXTENDED_MODEL_INTEL(cpi))
2411 2461 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2412 2462 break;
2413 2463 case X86_VENDOR_AMD:
2414 2464 if (CPI_FAMILY(cpi) == 0xf)
2415 2465 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2416 2466 break;
2417 2467 default:
2418 2468 if (cpi->cpi_model == 0xf)
2419 2469 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2420 2470 break;
2421 2471 }
2422 2472
2423 2473 cpi->cpi_step = CPI_STEP(cpi);
2424 2474 cpi->cpi_brandid = CPI_BRANDID(cpi);
2425 2475
2426 2476 /*
2427 2477 * *default* assumptions:
2428 2478 * - believe %edx feature word
2429 2479 * - ignore %ecx feature word
2430 2480 * - 32-bit virtual and physical addressing
2431 2481 */
2432 2482 mask_edx = 0xffffffff;
2433 2483 mask_ecx = 0;
2434 2484
2435 2485 cpi->cpi_pabits = cpi->cpi_vabits = 32;
2436 2486
2437 2487 switch (cpi->cpi_vendor) {
2438 2488 case X86_VENDOR_Intel:
2439 2489 if (cpi->cpi_family == 5)
2440 2490 x86_type = X86_TYPE_P5;
2441 2491 else if (IS_LEGACY_P6(cpi)) {
2442 2492 x86_type = X86_TYPE_P6;
2443 2493 pentiumpro_bug4046376 = 1;
2444 2494 /*
2445 2495 * Clear the SEP bit when it was set erroneously
2446 2496 */
2447 2497 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2448 2498 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2449 2499 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2450 2500 x86_type = X86_TYPE_P4;
2451 2501 /*
2452 2502 * We don't currently depend on any of the %ecx
2453 2503 * features until Prescott, so we'll only check
2454 2504 * this from P4 onwards. We might want to revisit
2455 2505 * that idea later.
2456 2506 */
2457 2507 mask_ecx = 0xffffffff;
2458 2508 } else if (cpi->cpi_family > 0xf)
2459 2509 mask_ecx = 0xffffffff;
2460 2510 /*
2461 2511 * We don't support MONITOR/MWAIT if leaf 5 is not available
2462 2512 * to obtain the monitor linesize.
2463 2513 */
2464 2514 if (cpi->cpi_maxeax < 5)
2465 2515 mask_ecx &= ~CPUID_INTC_ECX_MON;
2466 2516 break;
2467 2517 case X86_VENDOR_IntelClone:
2468 2518 default:
2469 2519 break;
2470 2520 case X86_VENDOR_AMD:
2471 2521 #if defined(OPTERON_ERRATUM_108)
2472 2522 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2473 2523 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2474 2524 cpi->cpi_model = 0xc;
2475 2525 } else
2476 2526 #endif
2477 2527 if (cpi->cpi_family == 5) {
2478 2528 /*
2479 2529 * AMD K5 and K6
2480 2530 *
2481 2531 * These CPUs have an incomplete implementation
2482 2532 * of MCA/MCE which we mask away.
2483 2533 */
2484 2534 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2485 2535
2486 2536 /*
2487 2537 * Model 0 uses the wrong (APIC) bit
2488 2538 * to indicate PGE. Fix it here.
2489 2539 */
2490 2540 if (cpi->cpi_model == 0) {
2491 2541 if (cp->cp_edx & 0x200) {
2492 2542 cp->cp_edx &= ~0x200;
2493 2543 cp->cp_edx |= CPUID_INTC_EDX_PGE;
2494 2544 }
2495 2545 }
2496 2546
2497 2547 /*
2498 2548 * Early models had problems w/ MMX; disable.
2499 2549 */
2500 2550 if (cpi->cpi_model < 6)
2501 2551 mask_edx &= ~CPUID_INTC_EDX_MMX;
2502 2552 }
2503 2553
2504 2554 /*
2505 2555 * For newer families, SSE3 and CX16, at least, are valid;
2506 2556 * enable all
2507 2557 */
2508 2558 if (cpi->cpi_family >= 0xf)
2509 2559 mask_ecx = 0xffffffff;
2510 2560 /*
2511 2561 * We don't support MONITOR/MWAIT if leaf 5 is not available
2512 2562 * to obtain the monitor linesize.
2513 2563 */
2514 2564 if (cpi->cpi_maxeax < 5)
2515 2565 mask_ecx &= ~CPUID_INTC_ECX_MON;
2516 2566
2517 2567 #if !defined(__xpv)
2518 2568 /*
2519 2569 * AMD has not historically used MWAIT in the CPU's idle loop.
2520 2570 * Pre-family-10h Opterons do not have the MWAIT instruction. We
2521 2571 * know for certain that in at least family 17h, per AMD, mwait
2522 2572 * is preferred. Families in-between are less certain.
2523 2573 */
2524 2574 if (cpi->cpi_family < 0x17) {
2525 2575 idle_cpu_prefer_mwait = 0;
2526 2576 }
2527 2577 #endif
2528 2578
2529 2579 break;
2530 2580 case X86_VENDOR_TM:
2531 2581 /*
2532 2582 * workaround the NT workaround in CMS 4.1
2533 2583 */
2534 2584 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2535 2585 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2536 2586 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2537 2587 break;
2538 2588 case X86_VENDOR_Centaur:
2539 2589 /*
2540 2590 * workaround the NT workarounds again
2541 2591 */
2542 2592 if (cpi->cpi_family == 6)
2543 2593 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2544 2594 break;
2545 2595 case X86_VENDOR_Cyrix:
2546 2596 /*
2547 2597 * We rely heavily on the probing in locore
2548 2598 * to actually figure out what parts, if any,
2549 2599 * of the Cyrix cpuid instruction to believe.
2550 2600 */
2551 2601 switch (x86_type) {
2552 2602 case X86_TYPE_CYRIX_486:
2553 2603 mask_edx = 0;
2554 2604 break;
2555 2605 case X86_TYPE_CYRIX_6x86:
2556 2606 mask_edx = 0;
2557 2607 break;
2558 2608 case X86_TYPE_CYRIX_6x86L:
2559 2609 mask_edx =
2560 2610 CPUID_INTC_EDX_DE |
2561 2611 CPUID_INTC_EDX_CX8;
2562 2612 break;
2563 2613 case X86_TYPE_CYRIX_6x86MX:
2564 2614 mask_edx =
2565 2615 CPUID_INTC_EDX_DE |
2566 2616 CPUID_INTC_EDX_MSR |
2567 2617 CPUID_INTC_EDX_CX8 |
2568 2618 CPUID_INTC_EDX_PGE |
2569 2619 CPUID_INTC_EDX_CMOV |
2570 2620 CPUID_INTC_EDX_MMX;
2571 2621 break;
2572 2622 case X86_TYPE_CYRIX_GXm:
2573 2623 mask_edx =
2574 2624 CPUID_INTC_EDX_MSR |
2575 2625 CPUID_INTC_EDX_CX8 |
2576 2626 CPUID_INTC_EDX_CMOV |
2577 2627 CPUID_INTC_EDX_MMX;
2578 2628 break;
2579 2629 case X86_TYPE_CYRIX_MediaGX:
2580 2630 break;
2581 2631 case X86_TYPE_CYRIX_MII:
2582 2632 case X86_TYPE_VIA_CYRIX_III:
2583 2633 mask_edx =
2584 2634 CPUID_INTC_EDX_DE |
2585 2635 CPUID_INTC_EDX_TSC |
2586 2636 CPUID_INTC_EDX_MSR |
2587 2637 CPUID_INTC_EDX_CX8 |
2588 2638 CPUID_INTC_EDX_PGE |
2589 2639 CPUID_INTC_EDX_CMOV |
2590 2640 CPUID_INTC_EDX_MMX;
2591 2641 break;
2592 2642 default:
2593 2643 break;
2594 2644 }
2595 2645 break;
2596 2646 }
2597 2647
2598 2648 #if defined(__xpv)
2599 2649 /*
2600 2650 * Do not support MONITOR/MWAIT under a hypervisor
2601 2651 */
2602 2652 mask_ecx &= ~CPUID_INTC_ECX_MON;
2603 2653 /*
2604 2654 * Do not support XSAVE under a hypervisor for now
2605 2655 */
2606 2656 xsave_force_disable = B_TRUE;
2607 2657
2608 2658 #endif /* __xpv */
2609 2659
2610 2660 if (xsave_force_disable) {
2611 2661 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2612 2662 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2613 2663 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2614 2664 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2615 2665 }
2616 2666
2617 2667 /*
2618 2668 * Now we've figured out the masks that determine
2619 2669 * which bits we choose to believe, apply the masks
2620 2670 * to the feature words, then map the kernel's view
2621 2671 * of these feature words into its feature word.
2622 2672 */
2623 2673 cp->cp_edx &= mask_edx;
2624 2674 cp->cp_ecx &= mask_ecx;
2625 2675
2626 2676 /*
2627 2677 * apply any platform restrictions (we don't call this
2628 2678 * immediately after __cpuid_insn here, because we need the
2629 2679 * workarounds applied above first)
2630 2680 */
2631 2681 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2632 2682
2633 2683 /*
2634 2684 * In addition to ecx and edx, Intel and AMD are storing a bunch of
2635 2685 * instruction set extensions in leaf 7's ebx, ecx, and edx.
2636 2686 */
2637 2687 if (cpi->cpi_maxeax >= 7) {
2638 2688 struct cpuid_regs *ecp;
2639 2689 ecp = &cpi->cpi_std[7];
2640 2690 ecp->cp_eax = 7;
2641 2691 ecp->cp_ecx = 0;
2642 2692 (void) __cpuid_insn(ecp);
2643 2693
2644 2694 /*
2645 2695 * If XSAVE has been disabled, just ignore all of the
2646 2696 * extended-save-area dependent flags here.
2647 2697 */
2648 2698 if (xsave_force_disable) {
2649 2699 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2650 2700 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2651 2701 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2652 2702 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2653 2703 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2654 2704 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2655 2705 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2656 2706 }
2657 2707
2658 2708 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2659 2709 add_x86_feature(featureset, X86FSET_SMEP);
2660 2710
2661 2711 /*
2662 2712 * We check disable_smap here in addition to in startup_smap()
2663 2713 * to ensure CPUs that aren't the boot CPU don't accidentally
2664 2714 * include it in the feature set and thus generate a mismatched
2665 2715 * x86 feature set across CPUs.
2666 2716 */
2667 2717 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2668 2718 disable_smap == 0)
2669 2719 add_x86_feature(featureset, X86FSET_SMAP);
2670 2720
2671 2721 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2672 2722 add_x86_feature(featureset, X86FSET_RDSEED);
2673 2723
2674 2724 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2675 2725 add_x86_feature(featureset, X86FSET_ADX);
2676 2726
2677 2727 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2678 2728 add_x86_feature(featureset, X86FSET_FSGSBASE);
2679 2729
2680 2730 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2681 2731 add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2682 2732
2683 2733 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2684 2734 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2685 2735 add_x86_feature(featureset, X86FSET_INVPCID);
2686 2736
2687 2737 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2688 2738 add_x86_feature(featureset, X86FSET_MPX);
2689 2739
2690 2740 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2691 2741 add_x86_feature(featureset, X86FSET_CLWB);
2692 2742 }
2693 2743 }
2694 2744
2695 2745 /*
2696 2746 * fold in overrides from the "eeprom" mechanism
2697 2747 */
2698 2748 cp->cp_edx |= cpuid_feature_edx_include;
2699 2749 cp->cp_edx &= ~cpuid_feature_edx_exclude;
2700 2750
2701 2751 cp->cp_ecx |= cpuid_feature_ecx_include;
2702 2752 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2703 2753
2704 2754 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2705 2755 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2706 2756 }
2707 2757 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2708 2758 add_x86_feature(featureset, X86FSET_TSC);
2709 2759 }
2710 2760 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2711 2761 add_x86_feature(featureset, X86FSET_MSR);
2712 2762 }
2713 2763 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2714 2764 add_x86_feature(featureset, X86FSET_MTRR);
2715 2765 }
2716 2766 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2717 2767 add_x86_feature(featureset, X86FSET_PGE);
2718 2768 }
2719 2769 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2720 2770 add_x86_feature(featureset, X86FSET_CMOV);
2721 2771 }
2722 2772 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2723 2773 add_x86_feature(featureset, X86FSET_MMX);
2724 2774 }
2725 2775 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2726 2776 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2727 2777 add_x86_feature(featureset, X86FSET_MCA);
2728 2778 }
2729 2779 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2730 2780 add_x86_feature(featureset, X86FSET_PAE);
2731 2781 }
2732 2782 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2733 2783 add_x86_feature(featureset, X86FSET_CX8);
2734 2784 }
2735 2785 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2736 2786 add_x86_feature(featureset, X86FSET_CX16);
2737 2787 }
2738 2788 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2739 2789 add_x86_feature(featureset, X86FSET_PAT);
2740 2790 }
2741 2791 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2742 2792 add_x86_feature(featureset, X86FSET_SEP);
2743 2793 }
2744 2794 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2745 2795 /*
2746 2796 * In our implementation, fxsave/fxrstor
2747 2797 * are prerequisites before we'll even
2748 2798 * try and do SSE things.
2749 2799 */
2750 2800 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2751 2801 add_x86_feature(featureset, X86FSET_SSE);
2752 2802 }
2753 2803 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2754 2804 add_x86_feature(featureset, X86FSET_SSE2);
2755 2805 }
2756 2806 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2757 2807 add_x86_feature(featureset, X86FSET_SSE3);
2758 2808 }
2759 2809 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2760 2810 add_x86_feature(featureset, X86FSET_SSSE3);
2761 2811 }
2762 2812 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2763 2813 add_x86_feature(featureset, X86FSET_SSE4_1);
2764 2814 }
2765 2815 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2766 2816 add_x86_feature(featureset, X86FSET_SSE4_2);
2767 2817 }
2768 2818 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2769 2819 add_x86_feature(featureset, X86FSET_AES);
2770 2820 }
2771 2821 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2772 2822 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2773 2823 }
2774 2824
2775 2825 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2776 2826 add_x86_feature(featureset, X86FSET_SHA);
2777 2827
2778 2828 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2779 2829 add_x86_feature(featureset, X86FSET_UMIP);
2780 2830 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2781 2831 add_x86_feature(featureset, X86FSET_PKU);
2782 2832 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2783 2833 add_x86_feature(featureset, X86FSET_OSPKE);
2784 2834
2785 2835 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2786 2836 add_x86_feature(featureset, X86FSET_XSAVE);
2787 2837
2788 2838 /* We only test AVX & AVX512 when there is XSAVE */
2789 2839
2790 2840 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2791 2841 add_x86_feature(featureset,
2792 2842 X86FSET_AVX);
2793 2843
2794 2844 /*
2795 2845 * Intel says we can't check these without also
2796 2846 * checking AVX.
2797 2847 */
2798 2848 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2799 2849 add_x86_feature(featureset,
2800 2850 X86FSET_F16C);
2801 2851
2802 2852 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2803 2853 add_x86_feature(featureset,
2804 2854 X86FSET_FMA);
2805 2855
2806 2856 if (cpi->cpi_std[7].cp_ebx &
2807 2857 CPUID_INTC_EBX_7_0_BMI1)
2808 2858 add_x86_feature(featureset,
2809 2859 X86FSET_BMI1);
2810 2860
2811 2861 if (cpi->cpi_std[7].cp_ebx &
2812 2862 CPUID_INTC_EBX_7_0_BMI2)
2813 2863 add_x86_feature(featureset,
2814 2864 X86FSET_BMI2);
2815 2865
2816 2866 if (cpi->cpi_std[7].cp_ebx &
2817 2867 CPUID_INTC_EBX_7_0_AVX2)
2818 2868 add_x86_feature(featureset,
2819 2869 X86FSET_AVX2);
2820 2870 }
2821 2871
2822 2872 if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2823 2873 (cpi->cpi_std[7].cp_ebx &
2824 2874 CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2825 2875 add_x86_feature(featureset, X86FSET_AVX512F);
2826 2876
2827 2877 if (cpi->cpi_std[7].cp_ebx &
2828 2878 CPUID_INTC_EBX_7_0_AVX512DQ)
2829 2879 add_x86_feature(featureset,
2830 2880 X86FSET_AVX512DQ);
2831 2881 if (cpi->cpi_std[7].cp_ebx &
2832 2882 CPUID_INTC_EBX_7_0_AVX512IFMA)
2833 2883 add_x86_feature(featureset,
2834 2884 X86FSET_AVX512FMA);
2835 2885 if (cpi->cpi_std[7].cp_ebx &
2836 2886 CPUID_INTC_EBX_7_0_AVX512PF)
2837 2887 add_x86_feature(featureset,
2838 2888 X86FSET_AVX512PF);
2839 2889 if (cpi->cpi_std[7].cp_ebx &
2840 2890 CPUID_INTC_EBX_7_0_AVX512ER)
2841 2891 add_x86_feature(featureset,
2842 2892 X86FSET_AVX512ER);
2843 2893 if (cpi->cpi_std[7].cp_ebx &
2844 2894 CPUID_INTC_EBX_7_0_AVX512CD)
2845 2895 add_x86_feature(featureset,
2846 2896 X86FSET_AVX512CD);
2847 2897 if (cpi->cpi_std[7].cp_ebx &
2848 2898 CPUID_INTC_EBX_7_0_AVX512BW)
2849 2899 add_x86_feature(featureset,
2850 2900 X86FSET_AVX512BW);
2851 2901 if (cpi->cpi_std[7].cp_ebx &
2852 2902 CPUID_INTC_EBX_7_0_AVX512VL)
2853 2903 add_x86_feature(featureset,
2854 2904 X86FSET_AVX512VL);
2855 2905
2856 2906 if (cpi->cpi_std[7].cp_ecx &
2857 2907 CPUID_INTC_ECX_7_0_AVX512VBMI)
2858 2908 add_x86_feature(featureset,
2859 2909 X86FSET_AVX512VBMI);
2860 2910 if (cpi->cpi_std[7].cp_ecx &
2861 2911 CPUID_INTC_ECX_7_0_AVX512VNNI)
2862 2912 add_x86_feature(featureset,
2863 2913 X86FSET_AVX512VNNI);
2864 2914 if (cpi->cpi_std[7].cp_ecx &
2865 2915 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2866 2916 add_x86_feature(featureset,
2867 2917 X86FSET_AVX512VPOPCDQ);
2868 2918
2869 2919 if (cpi->cpi_std[7].cp_edx &
2870 2920 CPUID_INTC_EDX_7_0_AVX5124NNIW)
2871 2921 add_x86_feature(featureset,
2872 2922 X86FSET_AVX512NNIW);
2873 2923 if (cpi->cpi_std[7].cp_edx &
2874 2924 CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2875 2925 add_x86_feature(featureset,
2876 2926 X86FSET_AVX512FMAPS);
2877 2927 }
2878 2928 }
2879 2929 }
2880 2930
2881 2931 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2882 2932 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2883 2933 add_x86_feature(featureset, X86FSET_PCID);
2884 2934 }
2885 2935 }
2886 2936
2887 2937 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2888 2938 add_x86_feature(featureset, X86FSET_X2APIC);
2889 2939 }
2890 2940 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2891 2941 add_x86_feature(featureset, X86FSET_DE);
2892 2942 }
2893 2943 #if !defined(__xpv)
2894 2944 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2895 2945
2896 2946 /*
2897 2947 * We require the CLFLUSH instruction for erratum workaround
2898 2948 * to use MONITOR/MWAIT.
2899 2949 */
2900 2950 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2901 2951 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2902 2952 add_x86_feature(featureset, X86FSET_MWAIT);
2903 2953 } else {
2904 2954 extern int idle_cpu_assert_cflush_monitor;
2905 2955
2906 2956 /*
2907 2957 * All processors we are aware of which have
2908 2958 * MONITOR/MWAIT also have CLFLUSH.
2909 2959 */
2910 2960 if (idle_cpu_assert_cflush_monitor) {
2911 2961 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2912 2962 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2913 2963 }
2914 2964 }
2915 2965 }
2916 2966 #endif /* __xpv */
2917 2967
2918 2968 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2919 2969 add_x86_feature(featureset, X86FSET_VMX);
2920 2970 }
2921 2971
2922 2972 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2923 2973 add_x86_feature(featureset, X86FSET_RDRAND);
2924 2974
2925 2975 /*
2926 2976 * Only need it first time, rest of the cpus would follow suit.
2927 2977 * we only capture this for the bootcpu.
2928 2978 */
2929 2979 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2930 2980 add_x86_feature(featureset, X86FSET_CLFSH);
2931 2981 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2932 2982 }
2933 2983 if (is_x86_feature(featureset, X86FSET_PAE))
2934 2984 cpi->cpi_pabits = 36;
2935 2985
2936 2986 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2937 2987 struct cpuid_regs r, *ecp;
2938 2988
2939 2989 ecp = &r;
2940 2990 ecp->cp_eax = 0xD;
2941 2991 ecp->cp_ecx = 1;
2942 2992 ecp->cp_edx = ecp->cp_ebx = 0;
2943 2993 (void) __cpuid_insn(ecp);
2944 2994
2945 2995 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2946 2996 add_x86_feature(featureset, X86FSET_XSAVEOPT);
2947 2997 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2948 2998 add_x86_feature(featureset, X86FSET_XSAVEC);
2949 2999 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
2950 3000 add_x86_feature(featureset, X86FSET_XSAVES);
2951 3001 }
2952 3002
2953 3003 /*
2954 3004 * Work on the "extended" feature information, doing
2955 3005 * some basic initialization for cpuid_pass2()
2956 3006 */
2957 3007 xcpuid = 0;
2958 3008 switch (cpi->cpi_vendor) {
2959 3009 case X86_VENDOR_Intel:
2960 3010 /*
2961 3011 * On KVM we know we will have proper support for extended
2962 3012 * cpuid.
2963 3013 */
2964 3014 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
2965 3015 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
2966 3016 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
2967 3017 xcpuid++;
2968 3018 break;
2969 3019 case X86_VENDOR_AMD:
2970 3020 if (cpi->cpi_family > 5 ||
2971 3021 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
2972 3022 xcpuid++;
2973 3023 break;
2974 3024 case X86_VENDOR_Cyrix:
2975 3025 /*
2976 3026 * Only these Cyrix CPUs are -known- to support
2977 3027 * extended cpuid operations.
2978 3028 */
2979 3029 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
2980 3030 x86_type == X86_TYPE_CYRIX_GXm)
2981 3031 xcpuid++;
2982 3032 break;
2983 3033 case X86_VENDOR_Centaur:
2984 3034 case X86_VENDOR_TM:
2985 3035 default:
2986 3036 xcpuid++;
2987 3037 break;
2988 3038 }
2989 3039
2990 3040 if (xcpuid) {
2991 3041 cp = &cpi->cpi_extd[0];
2992 3042 cp->cp_eax = CPUID_LEAF_EXT_0;
2993 3043 cpi->cpi_xmaxeax = __cpuid_insn(cp);
2994 3044 }
2995 3045
2996 3046 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
2997 3047
2998 3048 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
2999 3049 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3000 3050
3001 3051 switch (cpi->cpi_vendor) {
3002 3052 case X86_VENDOR_Intel:
3003 3053 case X86_VENDOR_AMD:
3004 3054 if (cpi->cpi_xmaxeax < 0x80000001)
3005 3055 break;
3006 3056 cp = &cpi->cpi_extd[1];
3007 3057 cp->cp_eax = 0x80000001;
3008 3058 (void) __cpuid_insn(cp);
3009 3059
3010 3060 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3011 3061 cpi->cpi_family == 5 &&
3012 3062 cpi->cpi_model == 6 &&
3013 3063 cpi->cpi_step == 6) {
3014 3064 /*
3015 3065 * K6 model 6 uses bit 10 to indicate SYSC
3016 3066 * Later models use bit 11. Fix it here.
3017 3067 */
3018 3068 if (cp->cp_edx & 0x400) {
3019 3069 cp->cp_edx &= ~0x400;
3020 3070 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3021 3071 }
3022 3072 }
3023 3073
3024 3074 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3025 3075
3026 3076 /*
3027 3077 * Compute the additions to the kernel's feature word.
3028 3078 */
3029 3079 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3030 3080 add_x86_feature(featureset, X86FSET_NX);
3031 3081 }
3032 3082
3033 3083 /*
3034 3084 * Regardless whether or not we boot 64-bit,
3035 3085 * we should have a way to identify whether
3036 3086 * the CPU is capable of running 64-bit.
3037 3087 */
3038 3088 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3039 3089 add_x86_feature(featureset, X86FSET_64);
3040 3090 }
3041 3091
3042 3092 /* 1 GB large page - enable only for 64 bit kernel */
3043 3093 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3044 3094 add_x86_feature(featureset, X86FSET_1GPG);
3045 3095 }
3046 3096
3047 3097 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3048 3098 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3049 3099 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3050 3100 add_x86_feature(featureset, X86FSET_SSE4A);
3051 3101 }
3052 3102
3053 3103 /*
3054 3104 * It's really tricky to support syscall/sysret in
3055 3105 * the i386 kernel; we rely on sysenter/sysexit
3056 3106 * instead. In the amd64 kernel, things are -way-
3057 3107 * better.
3058 3108 */
3059 3109 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3060 3110 add_x86_feature(featureset, X86FSET_ASYSC);
3061 3111 }
3062 3112
3063 3113 /*
3064 3114 * While we're thinking about system calls, note
3065 3115 * that AMD processors don't support sysenter
3066 3116 * in long mode at all, so don't try to program them.
3067 3117 */
3068 3118 if (x86_vendor == X86_VENDOR_AMD) {
3069 3119 remove_x86_feature(featureset, X86FSET_SEP);
3070 3120 }
3071 3121
3072 3122 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3073 3123 add_x86_feature(featureset, X86FSET_TSCP);
3074 3124 }
3075 3125
3076 3126 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3077 3127 add_x86_feature(featureset, X86FSET_SVM);
3078 3128 }
3079 3129
3080 3130 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3081 3131 add_x86_feature(featureset, X86FSET_TOPOEXT);
3082 3132 }
3083 3133
3084 3134 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3085 3135 add_x86_feature(featureset, X86FSET_XOP);
3086 3136 }
3087 3137
3088 3138 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3089 3139 add_x86_feature(featureset, X86FSET_FMA4);
3090 3140 }
3091 3141
3092 3142 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3093 3143 add_x86_feature(featureset, X86FSET_TBM);
3094 3144 }
3095 3145
3096 3146 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3097 3147 add_x86_feature(featureset, X86FSET_MONITORX);
3098 3148 }
3099 3149 break;
3100 3150 default:
3101 3151 break;
3102 3152 }
3103 3153
3104 3154 /*
3105 3155 * Get CPUID data about processor cores and hyperthreads.
3106 3156 */
3107 3157 switch (cpi->cpi_vendor) {
3108 3158 case X86_VENDOR_Intel:
3109 3159 if (cpi->cpi_maxeax >= 4) {
3110 3160 cp = &cpi->cpi_std[4];
3111 3161 cp->cp_eax = 4;
3112 3162 cp->cp_ecx = 0;
3113 3163 (void) __cpuid_insn(cp);
3114 3164 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3115 3165 }
3116 3166 /*FALLTHROUGH*/
3117 3167 case X86_VENDOR_AMD:
3118 3168 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3119 3169 break;
3120 3170 cp = &cpi->cpi_extd[8];
3121 3171 cp->cp_eax = CPUID_LEAF_EXT_8;
3122 3172 (void) __cpuid_insn(cp);
3123 3173 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3124 3174 cp);
3125 3175
3126 3176 /*
3127 3177 * AMD uses ebx for some extended functions.
3128 3178 */
3129 3179 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3130 3180 /*
3131 3181 * While we're here, check for the AMD "Error
3132 3182 * Pointer Zero/Restore" feature. This can be
3133 3183 * used to setup the FP save handlers
3134 3184 * appropriately.
3135 3185 */
3136 3186 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3137 3187 cpi->cpi_fp_amd_save = 0;
3138 3188 } else {
3139 3189 cpi->cpi_fp_amd_save = 1;
3140 3190 }
3141 3191
3142 3192 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3143 3193 add_x86_feature(featureset,
3144 3194 X86FSET_CLZERO);
3145 3195 }
3146 3196 }
3147 3197
3148 3198 /*
3149 3199 * Virtual and physical address limits from
3150 3200 * cpuid override previously guessed values.
3151 3201 */
3152 3202 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3153 3203 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3154 3204 break;
3155 3205 default:
3156 3206 break;
3157 3207 }
3158 3208
3159 3209 /*
3160 3210 * Get CPUID data about TSC Invariance in Deep C-State.
3161 3211 */
3162 3212 switch (cpi->cpi_vendor) {
3163 3213 case X86_VENDOR_Intel:
3164 3214 case X86_VENDOR_AMD:
3165 3215 if (cpi->cpi_maxeax >= 7) {
3166 3216 cp = &cpi->cpi_extd[7];
3167 3217 cp->cp_eax = 0x80000007;
3168 3218 cp->cp_ecx = 0;
3169 3219 (void) __cpuid_insn(cp);
3170 3220 }
3171 3221 break;
3172 3222 default:
3173 3223 break;
3174 3224 }
3175 3225 }
3176 3226
3177 3227 cpuid_pass1_topology(cpu, featureset);
3178 3228
3179 3229 /*
3180 3230 * Synthesize chip "revision" and socket type
3181 3231 */
3182 3232 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3183 3233 cpi->cpi_model, cpi->cpi_step);
3184 3234 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3185 3235 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3186 3236 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3187 3237 cpi->cpi_model, cpi->cpi_step);
3188 3238
3189 3239 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3190 3240 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3191 3241 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3192 3242 /* Special handling for AMD FP not necessary. */
3193 3243 cpi->cpi_fp_amd_save = 0;
3194 3244 } else {
3195 3245 cpi->cpi_fp_amd_save = 1;
3196 3246 }
3197 3247 }
3198 3248
3199 3249 /*
3200 3250 * Check the processor leaves that are used for security features.
3201 3251 */
3202 3252 cpuid_scan_security(cpu, featureset);
3203 3253
3204 3254 pass1_done:
3205 3255 cpi->cpi_pass = 1;
3206 3256 }
3207 3257
3208 3258 /*
3209 3259 * Make copies of the cpuid table entries we depend on, in
3210 3260 * part for ease of parsing now, in part so that we have only
3211 3261 * one place to correct any of it, in part for ease of
3212 3262 * later export to userland, and in part so we can look at
3213 3263 * this stuff in a crash dump.
3214 3264 */
3215 3265
3216 3266 /*ARGSUSED*/
3217 3267 void
3218 3268 cpuid_pass2(cpu_t *cpu)
3219 3269 {
3220 3270 uint_t n, nmax;
3221 3271 int i;
3222 3272 struct cpuid_regs *cp;
3223 3273 uint8_t *dp;
3224 3274 uint32_t *iptr;
3225 3275 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3226 3276
3227 3277 ASSERT(cpi->cpi_pass == 1);
3228 3278
3229 3279 if (cpi->cpi_maxeax < 1)
3230 3280 goto pass2_done;
3231 3281
3232 3282 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3233 3283 nmax = NMAX_CPI_STD;
3234 3284 /*
3235 3285 * (We already handled n == 0 and n == 1 in pass 1)
3236 3286 */
3237 3287 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3238 3288 cp->cp_eax = n;
3239 3289
3240 3290 /*
3241 3291 * n == 7 was handled in pass 1
3242 3292 */
3243 3293 if (n == 7)
3244 3294 continue;
3245 3295
3246 3296 /*
3247 3297 * CPUID function 4 expects %ecx to be initialized
3248 3298 * with an index which indicates which cache to return
3249 3299 * information about. The OS is expected to call function 4
3250 3300 * with %ecx set to 0, 1, 2, ... until it returns with
3251 3301 * EAX[4:0] set to 0, which indicates there are no more
3252 3302 * caches.
3253 3303 *
3254 3304 * Here, populate cpi_std[4] with the information returned by
3255 3305 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3256 3306 * when dynamic memory allocation becomes available.
3257 3307 *
3258 3308 * Note: we need to explicitly initialize %ecx here, since
3259 3309 * function 4 may have been previously invoked.
3260 3310 */
3261 3311 if (n == 4)
3262 3312 cp->cp_ecx = 0;
3263 3313
3264 3314 (void) __cpuid_insn(cp);
3265 3315 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3266 3316 switch (n) {
3267 3317 case 2:
3268 3318 /*
3269 3319 * "the lower 8 bits of the %eax register
3270 3320 * contain a value that identifies the number
3271 3321 * of times the cpuid [instruction] has to be
3272 3322 * executed to obtain a complete image of the
3273 3323 * processor's caching systems."
3274 3324 *
3275 3325 * How *do* they make this stuff up?
3276 3326 */
3277 3327 cpi->cpi_ncache = sizeof (*cp) *
3278 3328 BITX(cp->cp_eax, 7, 0);
3279 3329 if (cpi->cpi_ncache == 0)
3280 3330 break;
3281 3331 cpi->cpi_ncache--; /* skip count byte */
3282 3332
3283 3333 /*
3284 3334 * Well, for now, rather than attempt to implement
3285 3335 * this slightly dubious algorithm, we just look
3286 3336 * at the first 15 ..
3287 3337 */
3288 3338 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3289 3339 cpi->cpi_ncache = sizeof (*cp) - 1;
3290 3340
3291 3341 dp = cpi->cpi_cacheinfo;
3292 3342 if (BITX(cp->cp_eax, 31, 31) == 0) {
3293 3343 uint8_t *p = (void *)&cp->cp_eax;
3294 3344 for (i = 1; i < 4; i++)
3295 3345 if (p[i] != 0)
3296 3346 *dp++ = p[i];
3297 3347 }
3298 3348 if (BITX(cp->cp_ebx, 31, 31) == 0) {
3299 3349 uint8_t *p = (void *)&cp->cp_ebx;
3300 3350 for (i = 0; i < 4; i++)
3301 3351 if (p[i] != 0)
3302 3352 *dp++ = p[i];
3303 3353 }
3304 3354 if (BITX(cp->cp_ecx, 31, 31) == 0) {
3305 3355 uint8_t *p = (void *)&cp->cp_ecx;
3306 3356 for (i = 0; i < 4; i++)
3307 3357 if (p[i] != 0)
3308 3358 *dp++ = p[i];
3309 3359 }
3310 3360 if (BITX(cp->cp_edx, 31, 31) == 0) {
3311 3361 uint8_t *p = (void *)&cp->cp_edx;
3312 3362 for (i = 0; i < 4; i++)
3313 3363 if (p[i] != 0)
3314 3364 *dp++ = p[i];
3315 3365 }
3316 3366 break;
3317 3367
3318 3368 case 3: /* Processor serial number, if PSN supported */
3319 3369 break;
3320 3370
3321 3371 case 4: /* Deterministic cache parameters */
3322 3372 break;
3323 3373
3324 3374 case 5: /* Monitor/Mwait parameters */
3325 3375 {
3326 3376 size_t mwait_size;
3327 3377
3328 3378 /*
3329 3379 * check cpi_mwait.support which was set in cpuid_pass1
3330 3380 */
3331 3381 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3332 3382 break;
3333 3383
3334 3384 /*
3335 3385 * Protect ourself from insane mwait line size.
3336 3386 * Workaround for incomplete hardware emulator(s).
3337 3387 */
3338 3388 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3339 3389 if (mwait_size < sizeof (uint32_t) ||
3340 3390 !ISP2(mwait_size)) {
3341 3391 #if DEBUG
3342 3392 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3343 3393 "size %ld", cpu->cpu_id, (long)mwait_size);
3344 3394 #endif
3345 3395 break;
3346 3396 }
3347 3397
3348 3398 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3349 3399 cpi->cpi_mwait.mon_max = mwait_size;
3350 3400 if (MWAIT_EXTENSION(cpi)) {
3351 3401 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3352 3402 if (MWAIT_INT_ENABLE(cpi))
3353 3403 cpi->cpi_mwait.support |=
3354 3404 MWAIT_ECX_INT_ENABLE;
3355 3405 }
3356 3406 break;
3357 3407 }
3358 3408 default:
3359 3409 break;
3360 3410 }
3361 3411 }
3362 3412
3363 3413 /*
3364 3414 * XSAVE enumeration
3365 3415 */
3366 3416 if (cpi->cpi_maxeax >= 0xD) {
3367 3417 struct cpuid_regs regs;
3368 3418 boolean_t cpuid_d_valid = B_TRUE;
3369 3419
3370 3420 cp = ®s;
3371 3421 cp->cp_eax = 0xD;
3372 3422 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3373 3423
3374 3424 (void) __cpuid_insn(cp);
3375 3425
3376 3426 /*
3377 3427 * Sanity checks for debug
3378 3428 */
3379 3429 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3380 3430 (cp->cp_eax & XFEATURE_SSE) == 0) {
3381 3431 cpuid_d_valid = B_FALSE;
3382 3432 }
3383 3433
3384 3434 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3385 3435 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3386 3436 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3387 3437
3388 3438 /*
3389 3439 * If the hw supports AVX, get the size and offset in the save
3390 3440 * area for the ymm state.
3391 3441 */
3392 3442 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3393 3443 cp->cp_eax = 0xD;
3394 3444 cp->cp_ecx = 2;
3395 3445 cp->cp_edx = cp->cp_ebx = 0;
3396 3446
3397 3447 (void) __cpuid_insn(cp);
3398 3448
3399 3449 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3400 3450 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3401 3451 cpuid_d_valid = B_FALSE;
3402 3452 }
3403 3453
3404 3454 cpi->cpi_xsave.ymm_size = cp->cp_eax;
3405 3455 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3406 3456 }
3407 3457
3408 3458 /*
3409 3459 * If the hw supports MPX, get the size and offset in the
3410 3460 * save area for BNDREGS and BNDCSR.
3411 3461 */
3412 3462 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3413 3463 cp->cp_eax = 0xD;
3414 3464 cp->cp_ecx = 3;
3415 3465 cp->cp_edx = cp->cp_ebx = 0;
3416 3466
3417 3467 (void) __cpuid_insn(cp);
3418 3468
3419 3469 cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3420 3470 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3421 3471
3422 3472 cp->cp_eax = 0xD;
3423 3473 cp->cp_ecx = 4;
3424 3474 cp->cp_edx = cp->cp_ebx = 0;
3425 3475
3426 3476 (void) __cpuid_insn(cp);
3427 3477
3428 3478 cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3429 3479 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3430 3480 }
3431 3481
3432 3482 /*
3433 3483 * If the hw supports AVX512, get the size and offset in the
3434 3484 * save area for the opmask registers and zmm state.
3435 3485 */
3436 3486 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3437 3487 cp->cp_eax = 0xD;
3438 3488 cp->cp_ecx = 5;
3439 3489 cp->cp_edx = cp->cp_ebx = 0;
3440 3490
3441 3491 (void) __cpuid_insn(cp);
3442 3492
3443 3493 cpi->cpi_xsave.opmask_size = cp->cp_eax;
3444 3494 cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3445 3495
3446 3496 cp->cp_eax = 0xD;
3447 3497 cp->cp_ecx = 6;
3448 3498 cp->cp_edx = cp->cp_ebx = 0;
3449 3499
3450 3500 (void) __cpuid_insn(cp);
3451 3501
3452 3502 cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3453 3503 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3454 3504
3455 3505 cp->cp_eax = 0xD;
3456 3506 cp->cp_ecx = 7;
3457 3507 cp->cp_edx = cp->cp_ebx = 0;
3458 3508
3459 3509 (void) __cpuid_insn(cp);
3460 3510
3461 3511 cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3462 3512 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3463 3513 }
3464 3514
3465 3515 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3466 3516 xsave_state_size = 0;
3467 3517 } else if (cpuid_d_valid) {
3468 3518 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3469 3519 } else {
3470 3520 /* Broken CPUID 0xD, probably in HVM */
3471 3521 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3472 3522 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3473 3523 ", ymm_size = %d, ymm_offset = %d\n",
3474 3524 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3475 3525 cpi->cpi_xsave.xsav_hw_features_high,
3476 3526 (int)cpi->cpi_xsave.xsav_max_size,
3477 3527 (int)cpi->cpi_xsave.ymm_size,
3478 3528 (int)cpi->cpi_xsave.ymm_offset);
3479 3529
3480 3530 if (xsave_state_size != 0) {
3481 3531 /*
3482 3532 * This must be a non-boot CPU. We cannot
3483 3533 * continue, because boot cpu has already
3484 3534 * enabled XSAVE.
3485 3535 */
3486 3536 ASSERT(cpu->cpu_id != 0);
3487 3537 cmn_err(CE_PANIC, "cpu%d: we have already "
3488 3538 "enabled XSAVE on boot cpu, cannot "
3489 3539 "continue.", cpu->cpu_id);
3490 3540 } else {
3491 3541 /*
3492 3542 * If we reached here on the boot CPU, it's also
3493 3543 * almost certain that we'll reach here on the
3494 3544 * non-boot CPUs. When we're here on a boot CPU
3495 3545 * we should disable the feature, on a non-boot
3496 3546 * CPU we need to confirm that we have.
3497 3547 */
3498 3548 if (cpu->cpu_id == 0) {
3499 3549 remove_x86_feature(x86_featureset,
3500 3550 X86FSET_XSAVE);
3501 3551 remove_x86_feature(x86_featureset,
3502 3552 X86FSET_AVX);
3503 3553 remove_x86_feature(x86_featureset,
3504 3554 X86FSET_F16C);
3505 3555 remove_x86_feature(x86_featureset,
3506 3556 X86FSET_BMI1);
3507 3557 remove_x86_feature(x86_featureset,
3508 3558 X86FSET_BMI2);
3509 3559 remove_x86_feature(x86_featureset,
3510 3560 X86FSET_FMA);
3511 3561 remove_x86_feature(x86_featureset,
3512 3562 X86FSET_AVX2);
3513 3563 remove_x86_feature(x86_featureset,
3514 3564 X86FSET_MPX);
3515 3565 remove_x86_feature(x86_featureset,
3516 3566 X86FSET_AVX512F);
3517 3567 remove_x86_feature(x86_featureset,
3518 3568 X86FSET_AVX512DQ);
3519 3569 remove_x86_feature(x86_featureset,
3520 3570 X86FSET_AVX512PF);
3521 3571 remove_x86_feature(x86_featureset,
3522 3572 X86FSET_AVX512ER);
3523 3573 remove_x86_feature(x86_featureset,
3524 3574 X86FSET_AVX512CD);
3525 3575 remove_x86_feature(x86_featureset,
3526 3576 X86FSET_AVX512BW);
3527 3577 remove_x86_feature(x86_featureset,
3528 3578 X86FSET_AVX512VL);
3529 3579 remove_x86_feature(x86_featureset,
3530 3580 X86FSET_AVX512FMA);
3531 3581 remove_x86_feature(x86_featureset,
3532 3582 X86FSET_AVX512VBMI);
3533 3583 remove_x86_feature(x86_featureset,
3534 3584 X86FSET_AVX512VNNI);
3535 3585 remove_x86_feature(x86_featureset,
3536 3586 X86FSET_AVX512VPOPCDQ);
3537 3587 remove_x86_feature(x86_featureset,
3538 3588 X86FSET_AVX512NNIW);
3539 3589 remove_x86_feature(x86_featureset,
3540 3590 X86FSET_AVX512FMAPS);
3541 3591
3542 3592 CPI_FEATURES_ECX(cpi) &=
3543 3593 ~CPUID_INTC_ECX_XSAVE;
3544 3594 CPI_FEATURES_ECX(cpi) &=
3545 3595 ~CPUID_INTC_ECX_AVX;
3546 3596 CPI_FEATURES_ECX(cpi) &=
3547 3597 ~CPUID_INTC_ECX_F16C;
3548 3598 CPI_FEATURES_ECX(cpi) &=
3549 3599 ~CPUID_INTC_ECX_FMA;
3550 3600 CPI_FEATURES_7_0_EBX(cpi) &=
3551 3601 ~CPUID_INTC_EBX_7_0_BMI1;
3552 3602 CPI_FEATURES_7_0_EBX(cpi) &=
3553 3603 ~CPUID_INTC_EBX_7_0_BMI2;
3554 3604 CPI_FEATURES_7_0_EBX(cpi) &=
3555 3605 ~CPUID_INTC_EBX_7_0_AVX2;
3556 3606 CPI_FEATURES_7_0_EBX(cpi) &=
3557 3607 ~CPUID_INTC_EBX_7_0_MPX;
3558 3608 CPI_FEATURES_7_0_EBX(cpi) &=
3559 3609 ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3560 3610
3561 3611 CPI_FEATURES_7_0_ECX(cpi) &=
3562 3612 ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3563 3613
3564 3614 CPI_FEATURES_7_0_EDX(cpi) &=
3565 3615 ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3566 3616
3567 3617 xsave_force_disable = B_TRUE;
3568 3618 } else {
3569 3619 VERIFY(is_x86_feature(x86_featureset,
3570 3620 X86FSET_XSAVE) == B_FALSE);
3571 3621 }
3572 3622 }
3573 3623 }
3574 3624 }
3575 3625
3576 3626
3577 3627 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3578 3628 goto pass2_done;
3579 3629
3580 3630 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3581 3631 nmax = NMAX_CPI_EXTD;
3582 3632 /*
3583 3633 * Copy the extended properties, fixing them as we go.
3584 3634 * (We already handled n == 0 and n == 1 in pass 1)
3585 3635 */
3586 3636 iptr = (void *)cpi->cpi_brandstr;
3587 3637 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3588 3638 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3589 3639 (void) __cpuid_insn(cp);
3590 3640 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3591 3641 cp);
3592 3642 switch (n) {
3593 3643 case 2:
3594 3644 case 3:
3595 3645 case 4:
3596 3646 /*
3597 3647 * Extract the brand string
3598 3648 */
3599 3649 *iptr++ = cp->cp_eax;
3600 3650 *iptr++ = cp->cp_ebx;
3601 3651 *iptr++ = cp->cp_ecx;
3602 3652 *iptr++ = cp->cp_edx;
3603 3653 break;
3604 3654 case 5:
3605 3655 switch (cpi->cpi_vendor) {
3606 3656 case X86_VENDOR_AMD:
3607 3657 /*
3608 3658 * The Athlon and Duron were the first
3609 3659 * parts to report the sizes of the
3610 3660 * TLB for large pages. Before then,
3611 3661 * we don't trust the data.
3612 3662 */
3613 3663 if (cpi->cpi_family < 6 ||
3614 3664 (cpi->cpi_family == 6 &&
3615 3665 cpi->cpi_model < 1))
3616 3666 cp->cp_eax = 0;
3617 3667 break;
3618 3668 default:
3619 3669 break;
3620 3670 }
3621 3671 break;
3622 3672 case 6:
3623 3673 switch (cpi->cpi_vendor) {
3624 3674 case X86_VENDOR_AMD:
3625 3675 /*
3626 3676 * The Athlon and Duron were the first
3627 3677 * AMD parts with L2 TLB's.
3628 3678 * Before then, don't trust the data.
3629 3679 */
3630 3680 if (cpi->cpi_family < 6 ||
3631 3681 cpi->cpi_family == 6 &&
3632 3682 cpi->cpi_model < 1)
3633 3683 cp->cp_eax = cp->cp_ebx = 0;
3634 3684 /*
3635 3685 * AMD Duron rev A0 reports L2
3636 3686 * cache size incorrectly as 1K
3637 3687 * when it is really 64K
3638 3688 */
3639 3689 if (cpi->cpi_family == 6 &&
3640 3690 cpi->cpi_model == 3 &&
3641 3691 cpi->cpi_step == 0) {
3642 3692 cp->cp_ecx &= 0xffff;
3643 3693 cp->cp_ecx |= 0x400000;
3644 3694 }
3645 3695 break;
3646 3696 case X86_VENDOR_Cyrix: /* VIA C3 */
3647 3697 /*
3648 3698 * VIA C3 processors are a bit messed
3649 3699 * up w.r.t. encoding cache sizes in %ecx
3650 3700 */
3651 3701 if (cpi->cpi_family != 6)
3652 3702 break;
3653 3703 /*
3654 3704 * model 7 and 8 were incorrectly encoded
3655 3705 *
3656 3706 * xxx is model 8 really broken?
3657 3707 */
3658 3708 if (cpi->cpi_model == 7 ||
3659 3709 cpi->cpi_model == 8)
3660 3710 cp->cp_ecx =
3661 3711 BITX(cp->cp_ecx, 31, 24) << 16 |
3662 3712 BITX(cp->cp_ecx, 23, 16) << 12 |
3663 3713 BITX(cp->cp_ecx, 15, 8) << 8 |
3664 3714 BITX(cp->cp_ecx, 7, 0);
3665 3715 /*
3666 3716 * model 9 stepping 1 has wrong associativity
3667 3717 */
3668 3718 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3669 3719 cp->cp_ecx |= 8 << 12;
3670 3720 break;
3671 3721 case X86_VENDOR_Intel:
3672 3722 /*
3673 3723 * Extended L2 Cache features function.
3674 3724 * First appeared on Prescott.
3675 3725 */
3676 3726 default:
3677 3727 break;
3678 3728 }
3679 3729 break;
3680 3730 default:
3681 3731 break;
3682 3732 }
3683 3733 }
3684 3734
3685 3735 pass2_done:
3686 3736 cpi->cpi_pass = 2;
3687 3737 }
3688 3738
3689 3739 static const char *
3690 3740 intel_cpubrand(const struct cpuid_info *cpi)
3691 3741 {
3692 3742 int i;
3693 3743
3694 3744 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3695 3745 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3696 3746 return ("i486");
3697 3747
3698 3748 switch (cpi->cpi_family) {
3699 3749 case 5:
3700 3750 return ("Intel Pentium(r)");
3701 3751 case 6:
3702 3752 switch (cpi->cpi_model) {
3703 3753 uint_t celeron, xeon;
3704 3754 const struct cpuid_regs *cp;
3705 3755 case 0:
3706 3756 case 1:
3707 3757 case 2:
3708 3758 return ("Intel Pentium(r) Pro");
3709 3759 case 3:
3710 3760 case 4:
3711 3761 return ("Intel Pentium(r) II");
3712 3762 case 6:
3713 3763 return ("Intel Celeron(r)");
3714 3764 case 5:
3715 3765 case 7:
3716 3766 celeron = xeon = 0;
3717 3767 cp = &cpi->cpi_std[2]; /* cache info */
3718 3768
3719 3769 for (i = 1; i < 4; i++) {
3720 3770 uint_t tmp;
3721 3771
3722 3772 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3723 3773 if (tmp == 0x40)
3724 3774 celeron++;
3725 3775 if (tmp >= 0x44 && tmp <= 0x45)
3726 3776 xeon++;
3727 3777 }
3728 3778
3729 3779 for (i = 0; i < 2; i++) {
3730 3780 uint_t tmp;
3731 3781
3732 3782 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3733 3783 if (tmp == 0x40)
3734 3784 celeron++;
3735 3785 else if (tmp >= 0x44 && tmp <= 0x45)
3736 3786 xeon++;
3737 3787 }
3738 3788
3739 3789 for (i = 0; i < 4; i++) {
3740 3790 uint_t tmp;
3741 3791
3742 3792 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3743 3793 if (tmp == 0x40)
3744 3794 celeron++;
3745 3795 else if (tmp >= 0x44 && tmp <= 0x45)
3746 3796 xeon++;
3747 3797 }
3748 3798
3749 3799 for (i = 0; i < 4; i++) {
3750 3800 uint_t tmp;
3751 3801
3752 3802 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3753 3803 if (tmp == 0x40)
3754 3804 celeron++;
3755 3805 else if (tmp >= 0x44 && tmp <= 0x45)
3756 3806 xeon++;
3757 3807 }
3758 3808
3759 3809 if (celeron)
3760 3810 return ("Intel Celeron(r)");
3761 3811 if (xeon)
3762 3812 return (cpi->cpi_model == 5 ?
3763 3813 "Intel Pentium(r) II Xeon(tm)" :
3764 3814 "Intel Pentium(r) III Xeon(tm)");
3765 3815 return (cpi->cpi_model == 5 ?
3766 3816 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3767 3817 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3768 3818 default:
3769 3819 break;
3770 3820 }
3771 3821 default:
3772 3822 break;
3773 3823 }
3774 3824
3775 3825 /* BrandID is present if the field is nonzero */
3776 3826 if (cpi->cpi_brandid != 0) {
3777 3827 static const struct {
3778 3828 uint_t bt_bid;
3779 3829 const char *bt_str;
3780 3830 } brand_tbl[] = {
3781 3831 { 0x1, "Intel(r) Celeron(r)" },
3782 3832 { 0x2, "Intel(r) Pentium(r) III" },
3783 3833 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
3784 3834 { 0x4, "Intel(r) Pentium(r) III" },
3785 3835 { 0x6, "Mobile Intel(r) Pentium(r) III" },
3786 3836 { 0x7, "Mobile Intel(r) Celeron(r)" },
3787 3837 { 0x8, "Intel(r) Pentium(r) 4" },
3788 3838 { 0x9, "Intel(r) Pentium(r) 4" },
3789 3839 { 0xa, "Intel(r) Celeron(r)" },
3790 3840 { 0xb, "Intel(r) Xeon(tm)" },
3791 3841 { 0xc, "Intel(r) Xeon(tm) MP" },
3792 3842 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
3793 3843 { 0xf, "Mobile Intel(r) Celeron(r)" },
3794 3844 { 0x11, "Mobile Genuine Intel(r)" },
3795 3845 { 0x12, "Intel(r) Celeron(r) M" },
3796 3846 { 0x13, "Mobile Intel(r) Celeron(r)" },
3797 3847 { 0x14, "Intel(r) Celeron(r)" },
3798 3848 { 0x15, "Mobile Genuine Intel(r)" },
3799 3849 { 0x16, "Intel(r) Pentium(r) M" },
3800 3850 { 0x17, "Mobile Intel(r) Celeron(r)" }
3801 3851 };
3802 3852 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3803 3853 uint_t sgn;
3804 3854
3805 3855 sgn = (cpi->cpi_family << 8) |
3806 3856 (cpi->cpi_model << 4) | cpi->cpi_step;
3807 3857
3808 3858 for (i = 0; i < btblmax; i++)
3809 3859 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3810 3860 break;
3811 3861 if (i < btblmax) {
3812 3862 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3813 3863 return ("Intel(r) Celeron(r)");
3814 3864 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3815 3865 return ("Intel(r) Xeon(tm) MP");
3816 3866 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3817 3867 return ("Intel(r) Xeon(tm)");
3818 3868 return (brand_tbl[i].bt_str);
3819 3869 }
3820 3870 }
3821 3871
3822 3872 return (NULL);
3823 3873 }
3824 3874
3825 3875 static const char *
3826 3876 amd_cpubrand(const struct cpuid_info *cpi)
3827 3877 {
3828 3878 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3829 3879 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3830 3880 return ("i486 compatible");
3831 3881
3832 3882 switch (cpi->cpi_family) {
3833 3883 case 5:
3834 3884 switch (cpi->cpi_model) {
3835 3885 case 0:
3836 3886 case 1:
3837 3887 case 2:
3838 3888 case 3:
3839 3889 case 4:
3840 3890 case 5:
3841 3891 return ("AMD-K5(r)");
3842 3892 case 6:
3843 3893 case 7:
3844 3894 return ("AMD-K6(r)");
3845 3895 case 8:
3846 3896 return ("AMD-K6(r)-2");
3847 3897 case 9:
3848 3898 return ("AMD-K6(r)-III");
3849 3899 default:
3850 3900 return ("AMD (family 5)");
3851 3901 }
3852 3902 case 6:
3853 3903 switch (cpi->cpi_model) {
3854 3904 case 1:
3855 3905 return ("AMD-K7(tm)");
3856 3906 case 0:
3857 3907 case 2:
3858 3908 case 4:
3859 3909 return ("AMD Athlon(tm)");
3860 3910 case 3:
3861 3911 case 7:
3862 3912 return ("AMD Duron(tm)");
3863 3913 case 6:
3864 3914 case 8:
3865 3915 case 10:
3866 3916 /*
3867 3917 * Use the L2 cache size to distinguish
3868 3918 */
3869 3919 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3870 3920 "AMD Athlon(tm)" : "AMD Duron(tm)");
3871 3921 default:
3872 3922 return ("AMD (family 6)");
3873 3923 }
3874 3924 default:
3875 3925 break;
3876 3926 }
3877 3927
3878 3928 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3879 3929 cpi->cpi_brandid != 0) {
3880 3930 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3881 3931 case 3:
3882 3932 return ("AMD Opteron(tm) UP 1xx");
3883 3933 case 4:
3884 3934 return ("AMD Opteron(tm) DP 2xx");
3885 3935 case 5:
3886 3936 return ("AMD Opteron(tm) MP 8xx");
3887 3937 default:
3888 3938 return ("AMD Opteron(tm)");
3889 3939 }
3890 3940 }
3891 3941
3892 3942 return (NULL);
3893 3943 }
3894 3944
3895 3945 static const char *
3896 3946 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3897 3947 {
3898 3948 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3899 3949 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3900 3950 type == X86_TYPE_CYRIX_486)
3901 3951 return ("i486 compatible");
3902 3952
3903 3953 switch (type) {
3904 3954 case X86_TYPE_CYRIX_6x86:
3905 3955 return ("Cyrix 6x86");
3906 3956 case X86_TYPE_CYRIX_6x86L:
3907 3957 return ("Cyrix 6x86L");
3908 3958 case X86_TYPE_CYRIX_6x86MX:
3909 3959 return ("Cyrix 6x86MX");
3910 3960 case X86_TYPE_CYRIX_GXm:
3911 3961 return ("Cyrix GXm");
3912 3962 case X86_TYPE_CYRIX_MediaGX:
3913 3963 return ("Cyrix MediaGX");
3914 3964 case X86_TYPE_CYRIX_MII:
3915 3965 return ("Cyrix M2");
3916 3966 case X86_TYPE_VIA_CYRIX_III:
3917 3967 return ("VIA Cyrix M3");
3918 3968 default:
3919 3969 /*
3920 3970 * Have another wild guess ..
3921 3971 */
3922 3972 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3923 3973 return ("Cyrix 5x86");
3924 3974 else if (cpi->cpi_family == 5) {
3925 3975 switch (cpi->cpi_model) {
3926 3976 case 2:
3927 3977 return ("Cyrix 6x86"); /* Cyrix M1 */
3928 3978 case 4:
3929 3979 return ("Cyrix MediaGX");
3930 3980 default:
3931 3981 break;
3932 3982 }
3933 3983 } else if (cpi->cpi_family == 6) {
3934 3984 switch (cpi->cpi_model) {
3935 3985 case 0:
3936 3986 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3937 3987 case 5:
3938 3988 case 6:
3939 3989 case 7:
3940 3990 case 8:
3941 3991 case 9:
3942 3992 return ("VIA C3");
3943 3993 default:
3944 3994 break;
3945 3995 }
3946 3996 }
3947 3997 break;
3948 3998 }
3949 3999 return (NULL);
3950 4000 }
3951 4001
3952 4002 /*
3953 4003 * This only gets called in the case that the CPU extended
3954 4004 * feature brand string (0x80000002, 0x80000003, 0x80000004)
3955 4005 * aren't available, or contain null bytes for some reason.
3956 4006 */
3957 4007 static void
3958 4008 fabricate_brandstr(struct cpuid_info *cpi)
3959 4009 {
3960 4010 const char *brand = NULL;
3961 4011
3962 4012 switch (cpi->cpi_vendor) {
3963 4013 case X86_VENDOR_Intel:
3964 4014 brand = intel_cpubrand(cpi);
3965 4015 break;
3966 4016 case X86_VENDOR_AMD:
3967 4017 brand = amd_cpubrand(cpi);
3968 4018 break;
3969 4019 case X86_VENDOR_Cyrix:
3970 4020 brand = cyrix_cpubrand(cpi, x86_type);
3971 4021 break;
3972 4022 case X86_VENDOR_NexGen:
3973 4023 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3974 4024 brand = "NexGen Nx586";
3975 4025 break;
3976 4026 case X86_VENDOR_Centaur:
3977 4027 if (cpi->cpi_family == 5)
3978 4028 switch (cpi->cpi_model) {
3979 4029 case 4:
3980 4030 brand = "Centaur C6";
3981 4031 break;
3982 4032 case 8:
3983 4033 brand = "Centaur C2";
3984 4034 break;
3985 4035 case 9:
3986 4036 brand = "Centaur C3";
3987 4037 break;
3988 4038 default:
3989 4039 break;
3990 4040 }
3991 4041 break;
3992 4042 case X86_VENDOR_Rise:
3993 4043 if (cpi->cpi_family == 5 &&
3994 4044 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
3995 4045 brand = "Rise mP6";
3996 4046 break;
3997 4047 case X86_VENDOR_SiS:
3998 4048 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3999 4049 brand = "SiS 55x";
4000 4050 break;
4001 4051 case X86_VENDOR_TM:
4002 4052 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4003 4053 brand = "Transmeta Crusoe TM3x00 or TM5x00";
4004 4054 break;
4005 4055 case X86_VENDOR_NSC:
4006 4056 case X86_VENDOR_UMC:
4007 4057 default:
4008 4058 break;
4009 4059 }
4010 4060 if (brand) {
4011 4061 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4012 4062 return;
4013 4063 }
4014 4064
4015 4065 /*
4016 4066 * If all else fails ...
4017 4067 */
4018 4068 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4019 4069 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4020 4070 cpi->cpi_model, cpi->cpi_step);
4021 4071 }
4022 4072
4023 4073 /*
4024 4074 * This routine is called just after kernel memory allocation
4025 4075 * becomes available on cpu0, and as part of mp_startup() on
4026 4076 * the other cpus.
4027 4077 *
4028 4078 * Fixup the brand string, and collect any information from cpuid
4029 4079 * that requires dynamically allocated storage to represent.
4030 4080 */
4031 4081 /*ARGSUSED*/
4032 4082 void
4033 4083 cpuid_pass3(cpu_t *cpu)
4034 4084 {
4035 4085 int i, max, shft, level, size;
4036 4086 struct cpuid_regs regs;
4037 4087 struct cpuid_regs *cp;
4038 4088 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4039 4089
4040 4090 ASSERT(cpi->cpi_pass == 2);
4041 4091
4042 4092 /*
4043 4093 * Deterministic cache parameters
4044 4094 *
4045 4095 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4046 4096 * values that are present are currently defined to be the same. This
4047 4097 * means we can use the same logic to parse it as long as we use the
4048 4098 * appropriate leaf to get the data. If you're updating this, make sure
4049 4099 * you're careful about which vendor supports which aspect.
4050 4100 *
4051 4101 * Take this opportunity to detect the number of threads sharing the
4052 4102 * last level cache, and construct a corresponding cache id. The
4053 4103 * respective cpuid_info members are initialized to the default case of
4054 4104 * "no last level cache sharing".
4055 4105 */
4056 4106 cpi->cpi_ncpu_shr_last_cache = 1;
4057 4107 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4058 4108
4059 4109 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4060 4110 (cpi->cpi_vendor == X86_VENDOR_AMD &&
4061 4111 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4062 4112 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4063 4113 uint32_t leaf;
4064 4114
4065 4115 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4066 4116 leaf = 4;
4067 4117 } else {
4068 4118 leaf = CPUID_LEAF_EXT_1d;
4069 4119 }
4070 4120
4071 4121 /*
4072 4122 * Find the # of elements (size) returned by the leaf and along
4073 4123 * the way detect last level cache sharing details.
4074 4124 */
4075 4125 bzero(®s, sizeof (regs));
4076 4126 cp = ®s;
4077 4127 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4078 4128 cp->cp_eax = leaf;
4079 4129 cp->cp_ecx = i;
4080 4130
4081 4131 (void) __cpuid_insn(cp);
4082 4132
4083 4133 if (CPI_CACHE_TYPE(cp) == 0)
4084 4134 break;
4085 4135 level = CPI_CACHE_LVL(cp);
4086 4136 if (level > max) {
4087 4137 max = level;
4088 4138 cpi->cpi_ncpu_shr_last_cache =
4089 4139 CPI_NTHR_SHR_CACHE(cp) + 1;
4090 4140 }
4091 4141 }
4092 4142 cpi->cpi_cache_leaf_size = size = i;
4093 4143
4094 4144 /*
4095 4145 * Allocate the cpi_cache_leaves array. The first element
4096 4146 * references the regs for the corresponding leaf with %ecx set
4097 4147 * to 0. This was gathered in cpuid_pass2().
4098 4148 */
4099 4149 if (size > 0) {
4100 4150 cpi->cpi_cache_leaves =
4101 4151 kmem_alloc(size * sizeof (cp), KM_SLEEP);
4102 4152 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4103 4153 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4104 4154 } else {
4105 4155 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4106 4156 }
4107 4157
4108 4158 /*
4109 4159 * Allocate storage to hold the additional regs
4110 4160 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4111 4161 *
4112 4162 * The regs for the leaf, %ecx == 0 has already
4113 4163 * been allocated as indicated above.
4114 4164 */
4115 4165 for (i = 1; i < size; i++) {
4116 4166 cp = cpi->cpi_cache_leaves[i] =
4117 4167 kmem_zalloc(sizeof (regs), KM_SLEEP);
4118 4168 cp->cp_eax = leaf;
4119 4169 cp->cp_ecx = i;
4120 4170
4121 4171 (void) __cpuid_insn(cp);
4122 4172 }
4123 4173 }
4124 4174 /*
4125 4175 * Determine the number of bits needed to represent
4126 4176 * the number of CPUs sharing the last level cache.
4127 4177 *
4128 4178 * Shift off that number of bits from the APIC id to
4129 4179 * derive the cache id.
4130 4180 */
4131 4181 shft = 0;
4132 4182 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4133 4183 shft++;
4134 4184 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4135 4185 }
4136 4186
4137 4187 /*
4138 4188 * Now fixup the brand string
4139 4189 */
4140 4190 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4141 4191 fabricate_brandstr(cpi);
4142 4192 } else {
4143 4193
4144 4194 /*
4145 4195 * If we successfully extracted a brand string from the cpuid
4146 4196 * instruction, clean it up by removing leading spaces and
4147 4197 * similar junk.
4148 4198 */
4149 4199 if (cpi->cpi_brandstr[0]) {
4150 4200 size_t maxlen = sizeof (cpi->cpi_brandstr);
4151 4201 char *src, *dst;
4152 4202
4153 4203 dst = src = (char *)cpi->cpi_brandstr;
4154 4204 src[maxlen - 1] = '\0';
4155 4205 /*
4156 4206 * strip leading spaces
4157 4207 */
4158 4208 while (*src == ' ')
4159 4209 src++;
4160 4210 /*
4161 4211 * Remove any 'Genuine' or "Authentic" prefixes
4162 4212 */
4163 4213 if (strncmp(src, "Genuine ", 8) == 0)
4164 4214 src += 8;
4165 4215 if (strncmp(src, "Authentic ", 10) == 0)
4166 4216 src += 10;
4167 4217
4168 4218 /*
4169 4219 * Now do an in-place copy.
4170 4220 * Map (R) to (r) and (TM) to (tm).
4171 4221 * The era of teletypes is long gone, and there's
4172 4222 * -really- no need to shout.
4173 4223 */
4174 4224 while (*src != '\0') {
4175 4225 if (src[0] == '(') {
4176 4226 if (strncmp(src + 1, "R)", 2) == 0) {
4177 4227 (void) strncpy(dst, "(r)", 3);
4178 4228 src += 3;
4179 4229 dst += 3;
4180 4230 continue;
4181 4231 }
4182 4232 if (strncmp(src + 1, "TM)", 3) == 0) {
4183 4233 (void) strncpy(dst, "(tm)", 4);
4184 4234 src += 4;
4185 4235 dst += 4;
4186 4236 continue;
4187 4237 }
4188 4238 }
4189 4239 *dst++ = *src++;
4190 4240 }
4191 4241 *dst = '\0';
4192 4242
4193 4243 /*
4194 4244 * Finally, remove any trailing spaces
4195 4245 */
4196 4246 while (--dst > cpi->cpi_brandstr)
4197 4247 if (*dst == ' ')
4198 4248 *dst = '\0';
4199 4249 else
4200 4250 break;
4201 4251 } else
4202 4252 fabricate_brandstr(cpi);
4203 4253 }
4204 4254 cpi->cpi_pass = 3;
4205 4255 }
4206 4256
4207 4257 /*
4208 4258 * This routine is called out of bind_hwcap() much later in the life
4209 4259 * of the kernel (post_startup()). The job of this routine is to resolve
4210 4260 * the hardware feature support and kernel support for those features into
4211 4261 * what we're actually going to tell applications via the aux vector.
4212 4262 */
4213 4263 void
4214 4264 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4215 4265 {
4216 4266 struct cpuid_info *cpi;
4217 4267 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4218 4268
4219 4269 if (cpu == NULL)
4220 4270 cpu = CPU;
4221 4271 cpi = cpu->cpu_m.mcpu_cpi;
4222 4272
4223 4273 ASSERT(cpi->cpi_pass == 3);
4224 4274
4225 4275 if (cpi->cpi_maxeax >= 1) {
4226 4276 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4227 4277 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4228 4278 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4229 4279
4230 4280 *edx = CPI_FEATURES_EDX(cpi);
4231 4281 *ecx = CPI_FEATURES_ECX(cpi);
4232 4282 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4233 4283
4234 4284 /*
4235 4285 * [these require explicit kernel support]
4236 4286 */
4237 4287 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4238 4288 *edx &= ~CPUID_INTC_EDX_SEP;
4239 4289
4240 4290 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4241 4291 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4242 4292 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4243 4293 *edx &= ~CPUID_INTC_EDX_SSE2;
4244 4294
4245 4295 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4246 4296 *edx &= ~CPUID_INTC_EDX_HTT;
4247 4297
4248 4298 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4249 4299 *ecx &= ~CPUID_INTC_ECX_SSE3;
4250 4300
4251 4301 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4252 4302 *ecx &= ~CPUID_INTC_ECX_SSSE3;
4253 4303 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4254 4304 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4255 4305 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4256 4306 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4257 4307 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4258 4308 *ecx &= ~CPUID_INTC_ECX_AES;
4259 4309 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4260 4310 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4261 4311 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4262 4312 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4263 4313 CPUID_INTC_ECX_OSXSAVE);
4264 4314 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4265 4315 *ecx &= ~CPUID_INTC_ECX_AVX;
4266 4316 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4267 4317 *ecx &= ~CPUID_INTC_ECX_F16C;
4268 4318 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4269 4319 *ecx &= ~CPUID_INTC_ECX_FMA;
4270 4320 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4271 4321 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4272 4322 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4273 4323 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4274 4324 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4275 4325 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4276 4326 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4277 4327 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4278 4328 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4279 4329 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4280 4330
4281 4331 /*
4282 4332 * [no explicit support required beyond x87 fp context]
4283 4333 */
4284 4334 if (!fpu_exists)
4285 4335 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4286 4336
4287 4337 /*
4288 4338 * Now map the supported feature vector to things that we
4289 4339 * think userland will care about.
4290 4340 */
4291 4341 if (*edx & CPUID_INTC_EDX_SEP)
4292 4342 hwcap_flags |= AV_386_SEP;
4293 4343 if (*edx & CPUID_INTC_EDX_SSE)
4294 4344 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4295 4345 if (*edx & CPUID_INTC_EDX_SSE2)
4296 4346 hwcap_flags |= AV_386_SSE2;
4297 4347 if (*ecx & CPUID_INTC_ECX_SSE3)
4298 4348 hwcap_flags |= AV_386_SSE3;
4299 4349 if (*ecx & CPUID_INTC_ECX_SSSE3)
4300 4350 hwcap_flags |= AV_386_SSSE3;
4301 4351 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4302 4352 hwcap_flags |= AV_386_SSE4_1;
4303 4353 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4304 4354 hwcap_flags |= AV_386_SSE4_2;
4305 4355 if (*ecx & CPUID_INTC_ECX_MOVBE)
4306 4356 hwcap_flags |= AV_386_MOVBE;
4307 4357 if (*ecx & CPUID_INTC_ECX_AES)
4308 4358 hwcap_flags |= AV_386_AES;
4309 4359 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4310 4360 hwcap_flags |= AV_386_PCLMULQDQ;
4311 4361 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4312 4362 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4313 4363 hwcap_flags |= AV_386_XSAVE;
4314 4364
4315 4365 if (*ecx & CPUID_INTC_ECX_AVX) {
4316 4366 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4317 4367 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4318 4368
4319 4369 hwcap_flags |= AV_386_AVX;
4320 4370 if (*ecx & CPUID_INTC_ECX_F16C)
4321 4371 hwcap_flags_2 |= AV_386_2_F16C;
4322 4372 if (*ecx & CPUID_INTC_ECX_FMA)
4323 4373 hwcap_flags_2 |= AV_386_2_FMA;
4324 4374
4325 4375 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4326 4376 hwcap_flags_2 |= AV_386_2_BMI1;
4327 4377 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4328 4378 hwcap_flags_2 |= AV_386_2_BMI2;
4329 4379 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4330 4380 hwcap_flags_2 |= AV_386_2_AVX2;
4331 4381 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4332 4382 hwcap_flags_2 |= AV_386_2_AVX512F;
4333 4383 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4334 4384 hwcap_flags_2 |= AV_386_2_AVX512DQ;
4335 4385 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4336 4386 hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4337 4387 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4338 4388 hwcap_flags_2 |= AV_386_2_AVX512PF;
4339 4389 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4340 4390 hwcap_flags_2 |= AV_386_2_AVX512ER;
4341 4391 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4342 4392 hwcap_flags_2 |= AV_386_2_AVX512CD;
4343 4393 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4344 4394 hwcap_flags_2 |= AV_386_2_AVX512BW;
4345 4395 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4346 4396 hwcap_flags_2 |= AV_386_2_AVX512VL;
4347 4397
4348 4398 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4349 4399 hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4350 4400 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4351 4401 hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4352 4402 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4353 4403 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4354 4404
4355 4405 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4356 4406 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4357 4407 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4358 4408 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4359 4409 }
4360 4410 }
4361 4411 if (*ecx & CPUID_INTC_ECX_VMX)
4362 4412 hwcap_flags |= AV_386_VMX;
4363 4413 if (*ecx & CPUID_INTC_ECX_POPCNT)
4364 4414 hwcap_flags |= AV_386_POPCNT;
4365 4415 if (*edx & CPUID_INTC_EDX_FPU)
4366 4416 hwcap_flags |= AV_386_FPU;
4367 4417 if (*edx & CPUID_INTC_EDX_MMX)
4368 4418 hwcap_flags |= AV_386_MMX;
4369 4419
4370 4420 if (*edx & CPUID_INTC_EDX_TSC)
4371 4421 hwcap_flags |= AV_386_TSC;
4372 4422 if (*edx & CPUID_INTC_EDX_CX8)
4373 4423 hwcap_flags |= AV_386_CX8;
4374 4424 if (*edx & CPUID_INTC_EDX_CMOV)
4375 4425 hwcap_flags |= AV_386_CMOV;
4376 4426 if (*ecx & CPUID_INTC_ECX_CX16)
4377 4427 hwcap_flags |= AV_386_CX16;
4378 4428
4379 4429 if (*ecx & CPUID_INTC_ECX_RDRAND)
4380 4430 hwcap_flags_2 |= AV_386_2_RDRAND;
4381 4431 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4382 4432 hwcap_flags_2 |= AV_386_2_ADX;
4383 4433 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4384 4434 hwcap_flags_2 |= AV_386_2_RDSEED;
4385 4435 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4386 4436 hwcap_flags_2 |= AV_386_2_SHA;
4387 4437 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4388 4438 hwcap_flags_2 |= AV_386_2_FSGSBASE;
4389 4439 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4390 4440 hwcap_flags_2 |= AV_386_2_CLWB;
4391 4441 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4392 4442 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4393 4443
4394 4444 }
4395 4445 /*
4396 4446 * Check a few miscilaneous features.
4397 4447 */
4398 4448 if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4399 4449 hwcap_flags_2 |= AV_386_2_CLZERO;
4400 4450
4401 4451 if (cpi->cpi_xmaxeax < 0x80000001)
4402 4452 goto pass4_done;
4403 4453
4404 4454 switch (cpi->cpi_vendor) {
4405 4455 struct cpuid_regs cp;
4406 4456 uint32_t *edx, *ecx;
4407 4457
4408 4458 case X86_VENDOR_Intel:
4409 4459 /*
4410 4460 * Seems like Intel duplicated what we necessary
4411 4461 * here to make the initial crop of 64-bit OS's work.
4412 4462 * Hopefully, those are the only "extended" bits
4413 4463 * they'll add.
4414 4464 */
4415 4465 /*FALLTHROUGH*/
4416 4466
4417 4467 case X86_VENDOR_AMD:
4418 4468 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4419 4469 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4420 4470
4421 4471 *edx = CPI_FEATURES_XTD_EDX(cpi);
4422 4472 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4423 4473
4424 4474 /*
4425 4475 * [these features require explicit kernel support]
4426 4476 */
4427 4477 switch (cpi->cpi_vendor) {
4428 4478 case X86_VENDOR_Intel:
4429 4479 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4430 4480 *edx &= ~CPUID_AMD_EDX_TSCP;
4431 4481 break;
4432 4482
4433 4483 case X86_VENDOR_AMD:
4434 4484 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4435 4485 *edx &= ~CPUID_AMD_EDX_TSCP;
4436 4486 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4437 4487 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4438 4488 break;
4439 4489
4440 4490 default:
4441 4491 break;
4442 4492 }
4443 4493
4444 4494 /*
4445 4495 * [no explicit support required beyond
4446 4496 * x87 fp context and exception handlers]
4447 4497 */
4448 4498 if (!fpu_exists)
4449 4499 *edx &= ~(CPUID_AMD_EDX_MMXamd |
4450 4500 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4451 4501
4452 4502 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4453 4503 *edx &= ~CPUID_AMD_EDX_NX;
4454 4504 #if !defined(__amd64)
4455 4505 *edx &= ~CPUID_AMD_EDX_LM;
4456 4506 #endif
4457 4507 /*
4458 4508 * Now map the supported feature vector to
4459 4509 * things that we think userland will care about.
4460 4510 */
4461 4511 #if defined(__amd64)
4462 4512 if (*edx & CPUID_AMD_EDX_SYSC)
4463 4513 hwcap_flags |= AV_386_AMD_SYSC;
4464 4514 #endif
4465 4515 if (*edx & CPUID_AMD_EDX_MMXamd)
4466 4516 hwcap_flags |= AV_386_AMD_MMX;
4467 4517 if (*edx & CPUID_AMD_EDX_3DNow)
4468 4518 hwcap_flags |= AV_386_AMD_3DNow;
4469 4519 if (*edx & CPUID_AMD_EDX_3DNowx)
4470 4520 hwcap_flags |= AV_386_AMD_3DNowx;
4471 4521 if (*ecx & CPUID_AMD_ECX_SVM)
4472 4522 hwcap_flags |= AV_386_AMD_SVM;
4473 4523
4474 4524 switch (cpi->cpi_vendor) {
4475 4525 case X86_VENDOR_AMD:
4476 4526 if (*edx & CPUID_AMD_EDX_TSCP)
4477 4527 hwcap_flags |= AV_386_TSCP;
4478 4528 if (*ecx & CPUID_AMD_ECX_AHF64)
4479 4529 hwcap_flags |= AV_386_AHF;
4480 4530 if (*ecx & CPUID_AMD_ECX_SSE4A)
4481 4531 hwcap_flags |= AV_386_AMD_SSE4A;
4482 4532 if (*ecx & CPUID_AMD_ECX_LZCNT)
4483 4533 hwcap_flags |= AV_386_AMD_LZCNT;
4484 4534 if (*ecx & CPUID_AMD_ECX_MONITORX)
4485 4535 hwcap_flags_2 |= AV_386_2_MONITORX;
4486 4536 break;
4487 4537
4488 4538 case X86_VENDOR_Intel:
4489 4539 if (*edx & CPUID_AMD_EDX_TSCP)
4490 4540 hwcap_flags |= AV_386_TSCP;
4491 4541 /*
4492 4542 * Aarrgh.
4493 4543 * Intel uses a different bit in the same word.
4494 4544 */
4495 4545 if (*ecx & CPUID_INTC_ECX_AHF64)
4496 4546 hwcap_flags |= AV_386_AHF;
4497 4547 break;
4498 4548
4499 4549 default:
4500 4550 break;
4501 4551 }
4502 4552 break;
4503 4553
4504 4554 case X86_VENDOR_TM:
4505 4555 cp.cp_eax = 0x80860001;
4506 4556 (void) __cpuid_insn(&cp);
4507 4557 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4508 4558 break;
4509 4559
4510 4560 default:
4511 4561 break;
4512 4562 }
4513 4563
4514 4564 pass4_done:
4515 4565 cpi->cpi_pass = 4;
4516 4566 if (hwcap_out != NULL) {
4517 4567 hwcap_out[0] = hwcap_flags;
4518 4568 hwcap_out[1] = hwcap_flags_2;
4519 4569 }
4520 4570 }
4521 4571
4522 4572
4523 4573 /*
4524 4574 * Simulate the cpuid instruction using the data we previously
4525 4575 * captured about this CPU. We try our best to return the truth
4526 4576 * about the hardware, independently of kernel support.
4527 4577 */
4528 4578 uint32_t
4529 4579 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4530 4580 {
4531 4581 struct cpuid_info *cpi;
4532 4582 struct cpuid_regs *xcp;
4533 4583
4534 4584 if (cpu == NULL)
4535 4585 cpu = CPU;
4536 4586 cpi = cpu->cpu_m.mcpu_cpi;
4537 4587
4538 4588 ASSERT(cpuid_checkpass(cpu, 3));
4539 4589
4540 4590 /*
4541 4591 * CPUID data is cached in two separate places: cpi_std for standard
4542 4592 * CPUID leaves , and cpi_extd for extended CPUID leaves.
4543 4593 */
4544 4594 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4545 4595 xcp = &cpi->cpi_std[cp->cp_eax];
4546 4596 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4547 4597 cp->cp_eax <= cpi->cpi_xmaxeax &&
4548 4598 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4549 4599 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4550 4600 } else {
4551 4601 /*
4552 4602 * The caller is asking for data from an input parameter which
4553 4603 * the kernel has not cached. In this case we go fetch from
4554 4604 * the hardware and return the data directly to the user.
4555 4605 */
4556 4606 return (__cpuid_insn(cp));
4557 4607 }
4558 4608
4559 4609 cp->cp_eax = xcp->cp_eax;
4560 4610 cp->cp_ebx = xcp->cp_ebx;
4561 4611 cp->cp_ecx = xcp->cp_ecx;
4562 4612 cp->cp_edx = xcp->cp_edx;
4563 4613 return (cp->cp_eax);
4564 4614 }
4565 4615
4566 4616 int
4567 4617 cpuid_checkpass(cpu_t *cpu, int pass)
4568 4618 {
4569 4619 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4570 4620 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4571 4621 }
4572 4622
4573 4623 int
4574 4624 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4575 4625 {
4576 4626 ASSERT(cpuid_checkpass(cpu, 3));
4577 4627
4578 4628 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4579 4629 }
4580 4630
4581 4631 int
4582 4632 cpuid_is_cmt(cpu_t *cpu)
4583 4633 {
4584 4634 if (cpu == NULL)
4585 4635 cpu = CPU;
4586 4636
4587 4637 ASSERT(cpuid_checkpass(cpu, 1));
4588 4638
4589 4639 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4590 4640 }
4591 4641
4592 4642 /*
4593 4643 * AMD and Intel both implement the 64-bit variant of the syscall
4594 4644 * instruction (syscallq), so if there's -any- support for syscall,
4595 4645 * cpuid currently says "yes, we support this".
4596 4646 *
4597 4647 * However, Intel decided to -not- implement the 32-bit variant of the
4598 4648 * syscall instruction, so we provide a predicate to allow our caller
4599 4649 * to test that subtlety here.
4600 4650 *
4601 4651 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4602 4652 * even in the case where the hardware would in fact support it.
4603 4653 */
4604 4654 /*ARGSUSED*/
4605 4655 int
4606 4656 cpuid_syscall32_insn(cpu_t *cpu)
4607 4657 {
4608 4658 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4609 4659
4610 4660 #if !defined(__xpv)
4611 4661 if (cpu == NULL)
4612 4662 cpu = CPU;
4613 4663
4614 4664 /*CSTYLED*/
4615 4665 {
4616 4666 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4617 4667
4618 4668 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4619 4669 cpi->cpi_xmaxeax >= 0x80000001 &&
4620 4670 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4621 4671 return (1);
4622 4672 }
4623 4673 #endif
4624 4674 return (0);
4625 4675 }
4626 4676
4627 4677 int
4628 4678 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4629 4679 {
4630 4680 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4631 4681
4632 4682 static const char fmt[] =
4633 4683 "x86 (%s %X family %d model %d step %d clock %d MHz)";
4634 4684 static const char fmt_ht[] =
4635 4685 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4636 4686
4637 4687 ASSERT(cpuid_checkpass(cpu, 1));
4638 4688
4639 4689 if (cpuid_is_cmt(cpu))
4640 4690 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4641 4691 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4642 4692 cpi->cpi_family, cpi->cpi_model,
4643 4693 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4644 4694 return (snprintf(s, n, fmt,
4645 4695 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4646 4696 cpi->cpi_family, cpi->cpi_model,
4647 4697 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4648 4698 }
4649 4699
4650 4700 const char *
4651 4701 cpuid_getvendorstr(cpu_t *cpu)
4652 4702 {
4653 4703 ASSERT(cpuid_checkpass(cpu, 1));
4654 4704 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4655 4705 }
4656 4706
4657 4707 uint_t
4658 4708 cpuid_getvendor(cpu_t *cpu)
4659 4709 {
4660 4710 ASSERT(cpuid_checkpass(cpu, 1));
4661 4711 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4662 4712 }
4663 4713
4664 4714 uint_t
4665 4715 cpuid_getfamily(cpu_t *cpu)
4666 4716 {
4667 4717 ASSERT(cpuid_checkpass(cpu, 1));
4668 4718 return (cpu->cpu_m.mcpu_cpi->cpi_family);
4669 4719 }
4670 4720
4671 4721 uint_t
4672 4722 cpuid_getmodel(cpu_t *cpu)
4673 4723 {
4674 4724 ASSERT(cpuid_checkpass(cpu, 1));
4675 4725 return (cpu->cpu_m.mcpu_cpi->cpi_model);
4676 4726 }
4677 4727
4678 4728 uint_t
4679 4729 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4680 4730 {
4681 4731 ASSERT(cpuid_checkpass(cpu, 1));
4682 4732 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4683 4733 }
4684 4734
4685 4735 uint_t
4686 4736 cpuid_get_ncore_per_chip(cpu_t *cpu)
4687 4737 {
4688 4738 ASSERT(cpuid_checkpass(cpu, 1));
4689 4739 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4690 4740 }
4691 4741
4692 4742 uint_t
4693 4743 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4694 4744 {
4695 4745 ASSERT(cpuid_checkpass(cpu, 2));
4696 4746 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4697 4747 }
4698 4748
4699 4749 id_t
4700 4750 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4701 4751 {
4702 4752 ASSERT(cpuid_checkpass(cpu, 2));
4703 4753 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4704 4754 }
4705 4755
4706 4756 uint_t
4707 4757 cpuid_getstep(cpu_t *cpu)
4708 4758 {
4709 4759 ASSERT(cpuid_checkpass(cpu, 1));
4710 4760 return (cpu->cpu_m.mcpu_cpi->cpi_step);
4711 4761 }
4712 4762
4713 4763 uint_t
4714 4764 cpuid_getsig(struct cpu *cpu)
4715 4765 {
4716 4766 ASSERT(cpuid_checkpass(cpu, 1));
4717 4767 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4718 4768 }
4719 4769
4720 4770 uint32_t
4721 4771 cpuid_getchiprev(struct cpu *cpu)
4722 4772 {
4723 4773 ASSERT(cpuid_checkpass(cpu, 1));
4724 4774 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4725 4775 }
4726 4776
4727 4777 const char *
4728 4778 cpuid_getchiprevstr(struct cpu *cpu)
4729 4779 {
4730 4780 ASSERT(cpuid_checkpass(cpu, 1));
4731 4781 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4732 4782 }
4733 4783
4734 4784 uint32_t
4735 4785 cpuid_getsockettype(struct cpu *cpu)
4736 4786 {
4737 4787 ASSERT(cpuid_checkpass(cpu, 1));
4738 4788 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4739 4789 }
4740 4790
4741 4791 const char *
4742 4792 cpuid_getsocketstr(cpu_t *cpu)
4743 4793 {
4744 4794 static const char *socketstr = NULL;
4745 4795 struct cpuid_info *cpi;
4746 4796
4747 4797 ASSERT(cpuid_checkpass(cpu, 1));
4748 4798 cpi = cpu->cpu_m.mcpu_cpi;
4749 4799
4750 4800 /* Assume that socket types are the same across the system */
4751 4801 if (socketstr == NULL)
4752 4802 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4753 4803 cpi->cpi_model, cpi->cpi_step);
4754 4804
4755 4805
4756 4806 return (socketstr);
4757 4807 }
4758 4808
4759 4809 int
4760 4810 cpuid_get_chipid(cpu_t *cpu)
4761 4811 {
4762 4812 ASSERT(cpuid_checkpass(cpu, 1));
4763 4813
4764 4814 if (cpuid_is_cmt(cpu))
4765 4815 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4766 4816 return (cpu->cpu_id);
4767 4817 }
4768 4818
4769 4819 id_t
4770 4820 cpuid_get_coreid(cpu_t *cpu)
4771 4821 {
4772 4822 ASSERT(cpuid_checkpass(cpu, 1));
4773 4823 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4774 4824 }
4775 4825
4776 4826 int
4777 4827 cpuid_get_pkgcoreid(cpu_t *cpu)
4778 4828 {
4779 4829 ASSERT(cpuid_checkpass(cpu, 1));
4780 4830 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4781 4831 }
4782 4832
4783 4833 int
4784 4834 cpuid_get_clogid(cpu_t *cpu)
4785 4835 {
4786 4836 ASSERT(cpuid_checkpass(cpu, 1));
4787 4837 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4788 4838 }
4789 4839
4790 4840 int
4791 4841 cpuid_get_cacheid(cpu_t *cpu)
4792 4842 {
4793 4843 ASSERT(cpuid_checkpass(cpu, 1));
4794 4844 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4795 4845 }
4796 4846
4797 4847 uint_t
4798 4848 cpuid_get_procnodeid(cpu_t *cpu)
4799 4849 {
4800 4850 ASSERT(cpuid_checkpass(cpu, 1));
4801 4851 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4802 4852 }
4803 4853
4804 4854 uint_t
4805 4855 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4806 4856 {
4807 4857 ASSERT(cpuid_checkpass(cpu, 1));
4808 4858 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4809 4859 }
4810 4860
4811 4861 uint_t
4812 4862 cpuid_get_compunitid(cpu_t *cpu)
4813 4863 {
4814 4864 ASSERT(cpuid_checkpass(cpu, 1));
4815 4865 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4816 4866 }
4817 4867
4818 4868 uint_t
4819 4869 cpuid_get_cores_per_compunit(cpu_t *cpu)
4820 4870 {
4821 4871 ASSERT(cpuid_checkpass(cpu, 1));
4822 4872 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4823 4873 }
4824 4874
4825 4875 /*ARGSUSED*/
4826 4876 int
4827 4877 cpuid_have_cr8access(cpu_t *cpu)
4828 4878 {
4829 4879 #if defined(__amd64)
4830 4880 return (1);
4831 4881 #else
4832 4882 struct cpuid_info *cpi;
4833 4883
4834 4884 ASSERT(cpu != NULL);
4835 4885 cpi = cpu->cpu_m.mcpu_cpi;
4836 4886 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4837 4887 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4838 4888 return (1);
4839 4889 return (0);
4840 4890 #endif
4841 4891 }
4842 4892
4843 4893 uint32_t
4844 4894 cpuid_get_apicid(cpu_t *cpu)
4845 4895 {
4846 4896 ASSERT(cpuid_checkpass(cpu, 1));
4847 4897 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4848 4898 return (UINT32_MAX);
4849 4899 } else {
4850 4900 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4851 4901 }
4852 4902 }
4853 4903
4854 4904 void
4855 4905 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4856 4906 {
4857 4907 struct cpuid_info *cpi;
4858 4908
4859 4909 if (cpu == NULL)
4860 4910 cpu = CPU;
4861 4911 cpi = cpu->cpu_m.mcpu_cpi;
4862 4912
4863 4913 ASSERT(cpuid_checkpass(cpu, 1));
4864 4914
4865 4915 if (pabits)
4866 4916 *pabits = cpi->cpi_pabits;
4867 4917 if (vabits)
4868 4918 *vabits = cpi->cpi_vabits;
4869 4919 }
4870 4920
4871 4921 size_t
4872 4922 cpuid_get_xsave_size()
4873 4923 {
4874 4924 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4875 4925 sizeof (struct xsave_state)));
4876 4926 }
4877 4927
4878 4928 /*
4879 4929 * Return true if the CPUs on this system require 'pointer clearing' for the
4880 4930 * floating point error pointer exception handling. In the past, this has been
4881 4931 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4882 4932 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4883 4933 * feature bit and is reflected in the cpi_fp_amd_save member.
4884 4934 */
4885 4935 boolean_t
4886 4936 cpuid_need_fp_excp_handling()
4887 4937 {
4888 4938 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4889 4939 cpuid_info0.cpi_fp_amd_save != 0);
4890 4940 }
4891 4941
4892 4942 /*
4893 4943 * Returns the number of data TLB entries for a corresponding
4894 4944 * pagesize. If it can't be computed, or isn't known, the
4895 4945 * routine returns zero. If you ask about an architecturally
4896 4946 * impossible pagesize, the routine will panic (so that the
4897 4947 * hat implementor knows that things are inconsistent.)
4898 4948 */
4899 4949 uint_t
4900 4950 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4901 4951 {
4902 4952 struct cpuid_info *cpi;
4903 4953 uint_t dtlb_nent = 0;
4904 4954
4905 4955 if (cpu == NULL)
4906 4956 cpu = CPU;
4907 4957 cpi = cpu->cpu_m.mcpu_cpi;
4908 4958
4909 4959 ASSERT(cpuid_checkpass(cpu, 1));
4910 4960
4911 4961 /*
4912 4962 * Check the L2 TLB info
4913 4963 */
4914 4964 if (cpi->cpi_xmaxeax >= 0x80000006) {
4915 4965 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4916 4966
4917 4967 switch (pagesize) {
4918 4968
4919 4969 case 4 * 1024:
4920 4970 /*
4921 4971 * All zero in the top 16 bits of the register
4922 4972 * indicates a unified TLB. Size is in low 16 bits.
4923 4973 */
4924 4974 if ((cp->cp_ebx & 0xffff0000) == 0)
4925 4975 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4926 4976 else
4927 4977 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4928 4978 break;
4929 4979
4930 4980 case 2 * 1024 * 1024:
4931 4981 if ((cp->cp_eax & 0xffff0000) == 0)
4932 4982 dtlb_nent = cp->cp_eax & 0x0000ffff;
4933 4983 else
4934 4984 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4935 4985 break;
4936 4986
4937 4987 default:
4938 4988 panic("unknown L2 pagesize");
4939 4989 /*NOTREACHED*/
4940 4990 }
4941 4991 }
4942 4992
4943 4993 if (dtlb_nent != 0)
4944 4994 return (dtlb_nent);
4945 4995
4946 4996 /*
4947 4997 * No L2 TLB support for this size, try L1.
4948 4998 */
4949 4999 if (cpi->cpi_xmaxeax >= 0x80000005) {
4950 5000 struct cpuid_regs *cp = &cpi->cpi_extd[5];
4951 5001
4952 5002 switch (pagesize) {
4953 5003 case 4 * 1024:
4954 5004 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
4955 5005 break;
4956 5006 case 2 * 1024 * 1024:
4957 5007 dtlb_nent = BITX(cp->cp_eax, 23, 16);
4958 5008 break;
4959 5009 default:
4960 5010 panic("unknown L1 d-TLB pagesize");
4961 5011 /*NOTREACHED*/
4962 5012 }
4963 5013 }
4964 5014
4965 5015 return (dtlb_nent);
4966 5016 }
4967 5017
4968 5018 /*
4969 5019 * Return 0 if the erratum is not present or not applicable, positive
4970 5020 * if it is, and negative if the status of the erratum is unknown.
4971 5021 *
4972 5022 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
4973 5023 * Processors" #25759, Rev 3.57, August 2005
4974 5024 */
4975 5025 int
4976 5026 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
4977 5027 {
4978 5028 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4979 5029 uint_t eax;
4980 5030
4981 5031 /*
4982 5032 * Bail out if this CPU isn't an AMD CPU, or if it's
4983 5033 * a legacy (32-bit) AMD CPU.
4984 5034 */
4985 5035 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
4986 5036 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
4987 5037 cpi->cpi_family == 6) {
4988 5038 return (0);
4989 5039 }
4990 5040
4991 5041 eax = cpi->cpi_std[1].cp_eax;
4992 5042
4993 5043 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
4994 5044 #define SH_B3(eax) (eax == 0xf51)
4995 5045 #define B(eax) (SH_B0(eax) || SH_B3(eax))
4996 5046
4997 5047 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
4998 5048
4999 5049 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5000 5050 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5001 5051 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
5002 5052 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5003 5053
5004 5054 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5005 5055 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
5006 5056 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
5007 5057 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5008 5058
5009 5059 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5010 5060 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
5011 5061 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
5012 5062 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
5013 5063 #define BH_E4(eax) (eax == 0x20fb1)
5014 5064 #define SH_E5(eax) (eax == 0x20f42)
5015 5065 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
5016 5066 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
5017 5067 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5018 5068 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5019 5069 DH_E6(eax) || JH_E6(eax))
5020 5070
5021 5071 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5022 5072 #define DR_B0(eax) (eax == 0x100f20)
5023 5073 #define DR_B1(eax) (eax == 0x100f21)
5024 5074 #define DR_BA(eax) (eax == 0x100f2a)
5025 5075 #define DR_B2(eax) (eax == 0x100f22)
5026 5076 #define DR_B3(eax) (eax == 0x100f23)
5027 5077 #define RB_C0(eax) (eax == 0x100f40)
5028 5078
5029 5079 switch (erratum) {
5030 5080 case 1:
5031 5081 return (cpi->cpi_family < 0x10);
5032 5082 case 51: /* what does the asterisk mean? */
5033 5083 return (B(eax) || SH_C0(eax) || CG(eax));
5034 5084 case 52:
5035 5085 return (B(eax));
5036 5086 case 57:
5037 5087 return (cpi->cpi_family <= 0x11);
5038 5088 case 58:
5039 5089 return (B(eax));
5040 5090 case 60:
5041 5091 return (cpi->cpi_family <= 0x11);
5042 5092 case 61:
5043 5093 case 62:
5044 5094 case 63:
5045 5095 case 64:
5046 5096 case 65:
5047 5097 case 66:
5048 5098 case 68:
5049 5099 case 69:
5050 5100 case 70:
5051 5101 case 71:
5052 5102 return (B(eax));
5053 5103 case 72:
5054 5104 return (SH_B0(eax));
5055 5105 case 74:
5056 5106 return (B(eax));
5057 5107 case 75:
5058 5108 return (cpi->cpi_family < 0x10);
5059 5109 case 76:
5060 5110 return (B(eax));
5061 5111 case 77:
5062 5112 return (cpi->cpi_family <= 0x11);
5063 5113 case 78:
5064 5114 return (B(eax) || SH_C0(eax));
5065 5115 case 79:
5066 5116 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5067 5117 case 80:
5068 5118 case 81:
5069 5119 case 82:
5070 5120 return (B(eax));
5071 5121 case 83:
5072 5122 return (B(eax) || SH_C0(eax) || CG(eax));
5073 5123 case 85:
5074 5124 return (cpi->cpi_family < 0x10);
5075 5125 case 86:
5076 5126 return (SH_C0(eax) || CG(eax));
5077 5127 case 88:
5078 5128 #if !defined(__amd64)
5079 5129 return (0);
5080 5130 #else
5081 5131 return (B(eax) || SH_C0(eax));
5082 5132 #endif
5083 5133 case 89:
5084 5134 return (cpi->cpi_family < 0x10);
5085 5135 case 90:
5086 5136 return (B(eax) || SH_C0(eax) || CG(eax));
5087 5137 case 91:
5088 5138 case 92:
5089 5139 return (B(eax) || SH_C0(eax));
5090 5140 case 93:
5091 5141 return (SH_C0(eax));
5092 5142 case 94:
5093 5143 return (B(eax) || SH_C0(eax) || CG(eax));
5094 5144 case 95:
5095 5145 #if !defined(__amd64)
5096 5146 return (0);
5097 5147 #else
5098 5148 return (B(eax) || SH_C0(eax));
5099 5149 #endif
5100 5150 case 96:
5101 5151 return (B(eax) || SH_C0(eax) || CG(eax));
5102 5152 case 97:
5103 5153 case 98:
5104 5154 return (SH_C0(eax) || CG(eax));
5105 5155 case 99:
5106 5156 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5107 5157 case 100:
5108 5158 return (B(eax) || SH_C0(eax));
5109 5159 case 101:
5110 5160 case 103:
5111 5161 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5112 5162 case 104:
5113 5163 return (SH_C0(eax) || CG(eax) || D0(eax));
5114 5164 case 105:
5115 5165 case 106:
5116 5166 case 107:
5117 5167 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5118 5168 case 108:
5119 5169 return (DH_CG(eax));
5120 5170 case 109:
5121 5171 return (SH_C0(eax) || CG(eax) || D0(eax));
5122 5172 case 110:
5123 5173 return (D0(eax) || EX(eax));
5124 5174 case 111:
5125 5175 return (CG(eax));
5126 5176 case 112:
5127 5177 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5128 5178 case 113:
5129 5179 return (eax == 0x20fc0);
5130 5180 case 114:
5131 5181 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5132 5182 case 115:
5133 5183 return (SH_E0(eax) || JH_E1(eax));
5134 5184 case 116:
5135 5185 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5136 5186 case 117:
5137 5187 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5138 5188 case 118:
5139 5189 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5140 5190 JH_E6(eax));
5141 5191 case 121:
5142 5192 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5143 5193 case 122:
5144 5194 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5145 5195 case 123:
5146 5196 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5147 5197 case 131:
5148 5198 return (cpi->cpi_family < 0x10);
5149 5199 case 6336786:
5150 5200
5151 5201 /*
5152 5202 * Test for AdvPowerMgmtInfo.TscPStateInvariant
5153 5203 * if this is a K8 family or newer processor. We're testing for
5154 5204 * this 'erratum' to determine whether or not we have a constant
5155 5205 * TSC.
5156 5206 *
5157 5207 * Our current fix for this is to disable the C1-Clock ramping.
5158 5208 * However, this doesn't work on newer processor families nor
5159 5209 * does it work when virtualized as those devices don't exist.
5160 5210 */
5161 5211 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5162 5212 return (0);
5163 5213 }
5164 5214
5165 5215 if (CPI_FAMILY(cpi) == 0xf) {
5166 5216 struct cpuid_regs regs;
5167 5217 regs.cp_eax = 0x80000007;
5168 5218 (void) __cpuid_insn(®s);
5169 5219 return (!(regs.cp_edx & 0x100));
5170 5220 }
5171 5221 return (0);
5172 5222 case 6323525:
5173 5223 /*
5174 5224 * This erratum (K8 #147) is not present on family 10 and newer.
5175 5225 */
5176 5226 if (cpi->cpi_family >= 0x10) {
5177 5227 return (0);
5178 5228 }
5179 5229 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5180 5230 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5181 5231
5182 5232 case 6671130:
5183 5233 /*
5184 5234 * check for processors (pre-Shanghai) that do not provide
5185 5235 * optimal management of 1gb ptes in its tlb.
5186 5236 */
5187 5237 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5188 5238
5189 5239 case 298:
5190 5240 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5191 5241 DR_B2(eax) || RB_C0(eax));
5192 5242
5193 5243 case 721:
5194 5244 #if defined(__amd64)
5195 5245 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5196 5246 #else
5197 5247 return (0);
5198 5248 #endif
5199 5249
5200 5250 default:
5201 5251 return (-1);
5202 5252
5203 5253 }
5204 5254 }
5205 5255
5206 5256 /*
5207 5257 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5208 5258 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5209 5259 */
5210 5260 int
5211 5261 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5212 5262 {
5213 5263 struct cpuid_info *cpi;
5214 5264 uint_t osvwid;
5215 5265 static int osvwfeature = -1;
5216 5266 uint64_t osvwlength;
5217 5267
5218 5268
5219 5269 cpi = cpu->cpu_m.mcpu_cpi;
5220 5270
5221 5271 /* confirm OSVW supported */
5222 5272 if (osvwfeature == -1) {
5223 5273 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5224 5274 } else {
5225 5275 /* assert that osvw feature setting is consistent on all cpus */
5226 5276 ASSERT(osvwfeature ==
5227 5277 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5228 5278 }
5229 5279 if (!osvwfeature)
5230 5280 return (-1);
5231 5281
5232 5282 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5233 5283
5234 5284 switch (erratum) {
5235 5285 case 298: /* osvwid is 0 */
5236 5286 osvwid = 0;
5237 5287 if (osvwlength <= (uint64_t)osvwid) {
5238 5288 /* osvwid 0 is unknown */
5239 5289 return (-1);
5240 5290 }
5241 5291
5242 5292 /*
5243 5293 * Check the OSVW STATUS MSR to determine the state
5244 5294 * of the erratum where:
5245 5295 * 0 - fixed by HW
5246 5296 * 1 - BIOS has applied the workaround when BIOS
5247 5297 * workaround is available. (Or for other errata,
5248 5298 * OS workaround is required.)
5249 5299 * For a value of 1, caller will confirm that the
5250 5300 * erratum 298 workaround has indeed been applied by BIOS.
5251 5301 *
5252 5302 * A 1 may be set in cpus that have a HW fix
5253 5303 * in a mixed cpu system. Regarding erratum 298:
5254 5304 * In a multiprocessor platform, the workaround above
5255 5305 * should be applied to all processors regardless of
5256 5306 * silicon revision when an affected processor is
5257 5307 * present.
5258 5308 */
5259 5309
5260 5310 return (rdmsr(MSR_AMD_OSVW_STATUS +
5261 5311 (osvwid / OSVW_ID_CNT_PER_MSR)) &
5262 5312 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5263 5313
5264 5314 default:
5265 5315 return (-1);
5266 5316 }
5267 5317 }
5268 5318
5269 5319 static const char assoc_str[] = "associativity";
5270 5320 static const char line_str[] = "line-size";
5271 5321 static const char size_str[] = "size";
5272 5322
5273 5323 static void
5274 5324 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5275 5325 uint32_t val)
5276 5326 {
5277 5327 char buf[128];
5278 5328
5279 5329 /*
5280 5330 * ndi_prop_update_int() is used because it is desirable for
5281 5331 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5282 5332 */
5283 5333 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5284 5334 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5285 5335 }
5286 5336
5287 5337 /*
5288 5338 * Intel-style cache/tlb description
5289 5339 *
5290 5340 * Standard cpuid level 2 gives a randomly ordered
5291 5341 * selection of tags that index into a table that describes
5292 5342 * cache and tlb properties.
5293 5343 */
5294 5344
5295 5345 static const char l1_icache_str[] = "l1-icache";
5296 5346 static const char l1_dcache_str[] = "l1-dcache";
5297 5347 static const char l2_cache_str[] = "l2-cache";
5298 5348 static const char l3_cache_str[] = "l3-cache";
5299 5349 static const char itlb4k_str[] = "itlb-4K";
5300 5350 static const char dtlb4k_str[] = "dtlb-4K";
5301 5351 static const char itlb2M_str[] = "itlb-2M";
5302 5352 static const char itlb4M_str[] = "itlb-4M";
5303 5353 static const char dtlb4M_str[] = "dtlb-4M";
5304 5354 static const char dtlb24_str[] = "dtlb0-2M-4M";
5305 5355 static const char itlb424_str[] = "itlb-4K-2M-4M";
5306 5356 static const char itlb24_str[] = "itlb-2M-4M";
5307 5357 static const char dtlb44_str[] = "dtlb-4K-4M";
5308 5358 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5309 5359 static const char sl2_cache_str[] = "sectored-l2-cache";
5310 5360 static const char itrace_str[] = "itrace-cache";
5311 5361 static const char sl3_cache_str[] = "sectored-l3-cache";
5312 5362 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5313 5363
5314 5364 static const struct cachetab {
5315 5365 uint8_t ct_code;
5316 5366 uint8_t ct_assoc;
5317 5367 uint16_t ct_line_size;
5318 5368 size_t ct_size;
5319 5369 const char *ct_label;
5320 5370 } intel_ctab[] = {
5321 5371 /*
5322 5372 * maintain descending order!
5323 5373 *
5324 5374 * Codes ignored - Reason
5325 5375 * ----------------------
5326 5376 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5327 5377 * f0H/f1H - Currently we do not interpret prefetch size by design
5328 5378 */
5329 5379 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5330 5380 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5331 5381 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5332 5382 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5333 5383 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5334 5384 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5335 5385 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5336 5386 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5337 5387 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5338 5388 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5339 5389 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5340 5390 { 0xd0, 4, 64, 512*1024, l3_cache_str},
5341 5391 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5342 5392 { 0xc0, 4, 0, 8, dtlb44_str },
5343 5393 { 0xba, 4, 0, 64, dtlb4k_str },
5344 5394 { 0xb4, 4, 0, 256, dtlb4k_str },
5345 5395 { 0xb3, 4, 0, 128, dtlb4k_str },
5346 5396 { 0xb2, 4, 0, 64, itlb4k_str },
5347 5397 { 0xb0, 4, 0, 128, itlb4k_str },
5348 5398 { 0x87, 8, 64, 1024*1024, l2_cache_str},
5349 5399 { 0x86, 4, 64, 512*1024, l2_cache_str},
5350 5400 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5351 5401 { 0x84, 8, 32, 1024*1024, l2_cache_str},
5352 5402 { 0x83, 8, 32, 512*1024, l2_cache_str},
5353 5403 { 0x82, 8, 32, 256*1024, l2_cache_str},
5354 5404 { 0x80, 8, 64, 512*1024, l2_cache_str},
5355 5405 { 0x7f, 2, 64, 512*1024, l2_cache_str},
5356 5406 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5357 5407 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5358 5408 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5359 5409 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5360 5410 { 0x79, 8, 64, 128*1024, sl2_cache_str},
5361 5411 { 0x78, 8, 64, 1024*1024, l2_cache_str},
5362 5412 { 0x73, 8, 0, 64*1024, itrace_str},
5363 5413 { 0x72, 8, 0, 32*1024, itrace_str},
5364 5414 { 0x71, 8, 0, 16*1024, itrace_str},
5365 5415 { 0x70, 8, 0, 12*1024, itrace_str},
5366 5416 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5367 5417 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5368 5418 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5369 5419 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5370 5420 { 0x5d, 0, 0, 256, dtlb44_str},
5371 5421 { 0x5c, 0, 0, 128, dtlb44_str},
5372 5422 { 0x5b, 0, 0, 64, dtlb44_str},
5373 5423 { 0x5a, 4, 0, 32, dtlb24_str},
5374 5424 { 0x59, 0, 0, 16, dtlb4k_str},
5375 5425 { 0x57, 4, 0, 16, dtlb4k_str},
5376 5426 { 0x56, 4, 0, 16, dtlb4M_str},
5377 5427 { 0x55, 0, 0, 7, itlb24_str},
5378 5428 { 0x52, 0, 0, 256, itlb424_str},
5379 5429 { 0x51, 0, 0, 128, itlb424_str},
5380 5430 { 0x50, 0, 0, 64, itlb424_str},
5381 5431 { 0x4f, 0, 0, 32, itlb4k_str},
5382 5432 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5383 5433 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5384 5434 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5385 5435 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5386 5436 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5387 5437 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5388 5438 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5389 5439 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5390 5440 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5391 5441 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5392 5442 { 0x44, 4, 32, 1024*1024, l2_cache_str},
5393 5443 { 0x43, 4, 32, 512*1024, l2_cache_str},
5394 5444 { 0x42, 4, 32, 256*1024, l2_cache_str},
5395 5445 { 0x41, 4, 32, 128*1024, l2_cache_str},
5396 5446 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5397 5447 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5398 5448 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5399 5449 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5400 5450 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5401 5451 { 0x39, 4, 64, 128*1024, sl2_cache_str},
5402 5452 { 0x30, 8, 64, 32*1024, l1_icache_str},
5403 5453 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5404 5454 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5405 5455 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5406 5456 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5407 5457 { 0x22, 4, 64, 512*1024, sl3_cache_str},
5408 5458 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5409 5459 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5410 5460 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5411 5461 { 0x0b, 4, 0, 4, itlb4M_str},
5412 5462 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5413 5463 { 0x08, 4, 32, 16*1024, l1_icache_str},
5414 5464 { 0x06, 4, 32, 8*1024, l1_icache_str},
5415 5465 { 0x05, 4, 0, 32, dtlb4M_str},
5416 5466 { 0x04, 4, 0, 8, dtlb4M_str},
5417 5467 { 0x03, 4, 0, 64, dtlb4k_str},
5418 5468 { 0x02, 4, 0, 2, itlb4M_str},
5419 5469 { 0x01, 4, 0, 32, itlb4k_str},
5420 5470 { 0 }
5421 5471 };
5422 5472
5423 5473 static const struct cachetab cyrix_ctab[] = {
5424 5474 { 0x70, 4, 0, 32, "tlb-4K" },
5425 5475 { 0x80, 4, 16, 16*1024, "l1-cache" },
5426 5476 { 0 }
5427 5477 };
5428 5478
5429 5479 /*
5430 5480 * Search a cache table for a matching entry
5431 5481 */
5432 5482 static const struct cachetab *
5433 5483 find_cacheent(const struct cachetab *ct, uint_t code)
5434 5484 {
5435 5485 if (code != 0) {
5436 5486 for (; ct->ct_code != 0; ct++)
5437 5487 if (ct->ct_code <= code)
5438 5488 break;
5439 5489 if (ct->ct_code == code)
5440 5490 return (ct);
5441 5491 }
5442 5492 return (NULL);
5443 5493 }
5444 5494
5445 5495 /*
5446 5496 * Populate cachetab entry with L2 or L3 cache-information using
5447 5497 * cpuid function 4. This function is called from intel_walk_cacheinfo()
5448 5498 * when descriptor 0x49 is encountered. It returns 0 if no such cache
5449 5499 * information is found.
5450 5500 */
5451 5501 static int
5452 5502 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5453 5503 {
5454 5504 uint32_t level, i;
5455 5505 int ret = 0;
5456 5506
5457 5507 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5458 5508 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5459 5509
5460 5510 if (level == 2 || level == 3) {
5461 5511 ct->ct_assoc =
5462 5512 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5463 5513 ct->ct_line_size =
5464 5514 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5465 5515 ct->ct_size = ct->ct_assoc *
5466 5516 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5467 5517 ct->ct_line_size *
5468 5518 (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5469 5519
5470 5520 if (level == 2) {
5471 5521 ct->ct_label = l2_cache_str;
5472 5522 } else if (level == 3) {
5473 5523 ct->ct_label = l3_cache_str;
5474 5524 }
5475 5525 ret = 1;
5476 5526 }
5477 5527 }
5478 5528
5479 5529 return (ret);
5480 5530 }
5481 5531
5482 5532 /*
5483 5533 * Walk the cacheinfo descriptor, applying 'func' to every valid element
5484 5534 * The walk is terminated if the walker returns non-zero.
5485 5535 */
5486 5536 static void
5487 5537 intel_walk_cacheinfo(struct cpuid_info *cpi,
5488 5538 void *arg, int (*func)(void *, const struct cachetab *))
5489 5539 {
5490 5540 const struct cachetab *ct;
5491 5541 struct cachetab des_49_ct, des_b1_ct;
5492 5542 uint8_t *dp;
5493 5543 int i;
5494 5544
5495 5545 if ((dp = cpi->cpi_cacheinfo) == NULL)
5496 5546 return;
5497 5547 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5498 5548 /*
5499 5549 * For overloaded descriptor 0x49 we use cpuid function 4
5500 5550 * if supported by the current processor, to create
5501 5551 * cache information.
5502 5552 * For overloaded descriptor 0xb1 we use X86_PAE flag
5503 5553 * to disambiguate the cache information.
5504 5554 */
5505 5555 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5506 5556 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5507 5557 ct = &des_49_ct;
5508 5558 } else if (*dp == 0xb1) {
5509 5559 des_b1_ct.ct_code = 0xb1;
5510 5560 des_b1_ct.ct_assoc = 4;
5511 5561 des_b1_ct.ct_line_size = 0;
5512 5562 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5513 5563 des_b1_ct.ct_size = 8;
5514 5564 des_b1_ct.ct_label = itlb2M_str;
5515 5565 } else {
5516 5566 des_b1_ct.ct_size = 4;
5517 5567 des_b1_ct.ct_label = itlb4M_str;
5518 5568 }
5519 5569 ct = &des_b1_ct;
5520 5570 } else {
5521 5571 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5522 5572 continue;
5523 5573 }
5524 5574 }
5525 5575
5526 5576 if (func(arg, ct) != 0) {
5527 5577 break;
5528 5578 }
5529 5579 }
5530 5580 }
5531 5581
5532 5582 /*
5533 5583 * (Like the Intel one, except for Cyrix CPUs)
5534 5584 */
5535 5585 static void
5536 5586 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5537 5587 void *arg, int (*func)(void *, const struct cachetab *))
5538 5588 {
5539 5589 const struct cachetab *ct;
5540 5590 uint8_t *dp;
5541 5591 int i;
5542 5592
5543 5593 if ((dp = cpi->cpi_cacheinfo) == NULL)
5544 5594 return;
5545 5595 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5546 5596 /*
5547 5597 * Search Cyrix-specific descriptor table first ..
5548 5598 */
5549 5599 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5550 5600 if (func(arg, ct) != 0)
5551 5601 break;
5552 5602 continue;
5553 5603 }
5554 5604 /*
5555 5605 * .. else fall back to the Intel one
5556 5606 */
5557 5607 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5558 5608 if (func(arg, ct) != 0)
5559 5609 break;
5560 5610 continue;
5561 5611 }
5562 5612 }
5563 5613 }
5564 5614
5565 5615 /*
5566 5616 * A cacheinfo walker that adds associativity, line-size, and size properties
5567 5617 * to the devinfo node it is passed as an argument.
5568 5618 */
5569 5619 static int
5570 5620 add_cacheent_props(void *arg, const struct cachetab *ct)
5571 5621 {
5572 5622 dev_info_t *devi = arg;
5573 5623
5574 5624 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5575 5625 if (ct->ct_line_size != 0)
5576 5626 add_cache_prop(devi, ct->ct_label, line_str,
5577 5627 ct->ct_line_size);
5578 5628 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5579 5629 return (0);
5580 5630 }
5581 5631
5582 5632
5583 5633 static const char fully_assoc[] = "fully-associative?";
5584 5634
5585 5635 /*
5586 5636 * AMD style cache/tlb description
5587 5637 *
5588 5638 * Extended functions 5 and 6 directly describe properties of
5589 5639 * tlbs and various cache levels.
5590 5640 */
5591 5641 static void
5592 5642 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5593 5643 {
5594 5644 switch (assoc) {
5595 5645 case 0: /* reserved; ignore */
5596 5646 break;
5597 5647 default:
5598 5648 add_cache_prop(devi, label, assoc_str, assoc);
5599 5649 break;
5600 5650 case 0xff:
5601 5651 add_cache_prop(devi, label, fully_assoc, 1);
5602 5652 break;
5603 5653 }
5604 5654 }
5605 5655
5606 5656 static void
5607 5657 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5608 5658 {
5609 5659 if (size == 0)
5610 5660 return;
5611 5661 add_cache_prop(devi, label, size_str, size);
5612 5662 add_amd_assoc(devi, label, assoc);
5613 5663 }
5614 5664
5615 5665 static void
5616 5666 add_amd_cache(dev_info_t *devi, const char *label,
5617 5667 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5618 5668 {
5619 5669 if (size == 0 || line_size == 0)
5620 5670 return;
5621 5671 add_amd_assoc(devi, label, assoc);
5622 5672 /*
5623 5673 * Most AMD parts have a sectored cache. Multiple cache lines are
5624 5674 * associated with each tag. A sector consists of all cache lines
5625 5675 * associated with a tag. For example, the AMD K6-III has a sector
5626 5676 * size of 2 cache lines per tag.
5627 5677 */
5628 5678 if (lines_per_tag != 0)
5629 5679 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5630 5680 add_cache_prop(devi, label, line_str, line_size);
5631 5681 add_cache_prop(devi, label, size_str, size * 1024);
5632 5682 }
5633 5683
5634 5684 static void
5635 5685 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5636 5686 {
5637 5687 switch (assoc) {
5638 5688 case 0: /* off */
5639 5689 break;
5640 5690 case 1:
5641 5691 case 2:
5642 5692 case 4:
5643 5693 add_cache_prop(devi, label, assoc_str, assoc);
5644 5694 break;
5645 5695 case 6:
5646 5696 add_cache_prop(devi, label, assoc_str, 8);
5647 5697 break;
5648 5698 case 8:
5649 5699 add_cache_prop(devi, label, assoc_str, 16);
5650 5700 break;
5651 5701 case 0xf:
5652 5702 add_cache_prop(devi, label, fully_assoc, 1);
5653 5703 break;
5654 5704 default: /* reserved; ignore */
5655 5705 break;
5656 5706 }
5657 5707 }
5658 5708
5659 5709 static void
5660 5710 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5661 5711 {
5662 5712 if (size == 0 || assoc == 0)
5663 5713 return;
5664 5714 add_amd_l2_assoc(devi, label, assoc);
5665 5715 add_cache_prop(devi, label, size_str, size);
5666 5716 }
5667 5717
5668 5718 static void
5669 5719 add_amd_l2_cache(dev_info_t *devi, const char *label,
5670 5720 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5671 5721 {
5672 5722 if (size == 0 || assoc == 0 || line_size == 0)
5673 5723 return;
5674 5724 add_amd_l2_assoc(devi, label, assoc);
5675 5725 if (lines_per_tag != 0)
5676 5726 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5677 5727 add_cache_prop(devi, label, line_str, line_size);
5678 5728 add_cache_prop(devi, label, size_str, size * 1024);
5679 5729 }
5680 5730
5681 5731 static void
5682 5732 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5683 5733 {
5684 5734 struct cpuid_regs *cp;
5685 5735
5686 5736 if (cpi->cpi_xmaxeax < 0x80000005)
5687 5737 return;
5688 5738 cp = &cpi->cpi_extd[5];
5689 5739
5690 5740 /*
5691 5741 * 4M/2M L1 TLB configuration
5692 5742 *
5693 5743 * We report the size for 2M pages because AMD uses two
5694 5744 * TLB entries for one 4M page.
5695 5745 */
5696 5746 add_amd_tlb(devi, "dtlb-2M",
5697 5747 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5698 5748 add_amd_tlb(devi, "itlb-2M",
5699 5749 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5700 5750
5701 5751 /*
5702 5752 * 4K L1 TLB configuration
5703 5753 */
5704 5754
5705 5755 switch (cpi->cpi_vendor) {
5706 5756 uint_t nentries;
5707 5757 case X86_VENDOR_TM:
5708 5758 if (cpi->cpi_family >= 5) {
5709 5759 /*
5710 5760 * Crusoe processors have 256 TLB entries, but
5711 5761 * cpuid data format constrains them to only
5712 5762 * reporting 255 of them.
5713 5763 */
5714 5764 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5715 5765 nentries = 256;
5716 5766 /*
5717 5767 * Crusoe processors also have a unified TLB
5718 5768 */
5719 5769 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5720 5770 nentries);
5721 5771 break;
5722 5772 }
5723 5773 /*FALLTHROUGH*/
5724 5774 default:
5725 5775 add_amd_tlb(devi, itlb4k_str,
5726 5776 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5727 5777 add_amd_tlb(devi, dtlb4k_str,
5728 5778 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5729 5779 break;
5730 5780 }
5731 5781
5732 5782 /*
5733 5783 * data L1 cache configuration
5734 5784 */
5735 5785
5736 5786 add_amd_cache(devi, l1_dcache_str,
5737 5787 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5738 5788 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5739 5789
5740 5790 /*
5741 5791 * code L1 cache configuration
5742 5792 */
5743 5793
5744 5794 add_amd_cache(devi, l1_icache_str,
5745 5795 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5746 5796 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5747 5797
5748 5798 if (cpi->cpi_xmaxeax < 0x80000006)
5749 5799 return;
5750 5800 cp = &cpi->cpi_extd[6];
5751 5801
5752 5802 /* Check for a unified L2 TLB for large pages */
5753 5803
5754 5804 if (BITX(cp->cp_eax, 31, 16) == 0)
5755 5805 add_amd_l2_tlb(devi, "l2-tlb-2M",
5756 5806 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5757 5807 else {
5758 5808 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5759 5809 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5760 5810 add_amd_l2_tlb(devi, "l2-itlb-2M",
5761 5811 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5762 5812 }
5763 5813
5764 5814 /* Check for a unified L2 TLB for 4K pages */
5765 5815
5766 5816 if (BITX(cp->cp_ebx, 31, 16) == 0) {
5767 5817 add_amd_l2_tlb(devi, "l2-tlb-4K",
5768 5818 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5769 5819 } else {
5770 5820 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5771 5821 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5772 5822 add_amd_l2_tlb(devi, "l2-itlb-4K",
5773 5823 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5774 5824 }
5775 5825
5776 5826 add_amd_l2_cache(devi, l2_cache_str,
5777 5827 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5778 5828 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5779 5829 }
5780 5830
5781 5831 /*
5782 5832 * There are two basic ways that the x86 world describes it cache
5783 5833 * and tlb architecture - Intel's way and AMD's way.
5784 5834 *
5785 5835 * Return which flavor of cache architecture we should use
5786 5836 */
5787 5837 static int
5788 5838 x86_which_cacheinfo(struct cpuid_info *cpi)
5789 5839 {
5790 5840 switch (cpi->cpi_vendor) {
5791 5841 case X86_VENDOR_Intel:
5792 5842 if (cpi->cpi_maxeax >= 2)
5793 5843 return (X86_VENDOR_Intel);
5794 5844 break;
5795 5845 case X86_VENDOR_AMD:
5796 5846 /*
5797 5847 * The K5 model 1 was the first part from AMD that reported
5798 5848 * cache sizes via extended cpuid functions.
5799 5849 */
5800 5850 if (cpi->cpi_family > 5 ||
5801 5851 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5802 5852 return (X86_VENDOR_AMD);
5803 5853 break;
5804 5854 case X86_VENDOR_TM:
5805 5855 if (cpi->cpi_family >= 5)
5806 5856 return (X86_VENDOR_AMD);
5807 5857 /*FALLTHROUGH*/
5808 5858 default:
5809 5859 /*
5810 5860 * If they have extended CPU data for 0x80000005
5811 5861 * then we assume they have AMD-format cache
5812 5862 * information.
5813 5863 *
5814 5864 * If not, and the vendor happens to be Cyrix,
5815 5865 * then try our-Cyrix specific handler.
5816 5866 *
5817 5867 * If we're not Cyrix, then assume we're using Intel's
5818 5868 * table-driven format instead.
5819 5869 */
5820 5870 if (cpi->cpi_xmaxeax >= 0x80000005)
5821 5871 return (X86_VENDOR_AMD);
5822 5872 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5823 5873 return (X86_VENDOR_Cyrix);
5824 5874 else if (cpi->cpi_maxeax >= 2)
5825 5875 return (X86_VENDOR_Intel);
5826 5876 break;
5827 5877 }
5828 5878 return (-1);
5829 5879 }
5830 5880
5831 5881 void
5832 5882 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5833 5883 struct cpuid_info *cpi)
5834 5884 {
5835 5885 dev_info_t *cpu_devi;
5836 5886 int create;
5837 5887
5838 5888 cpu_devi = (dev_info_t *)dip;
5839 5889
5840 5890 /* device_type */
5841 5891 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5842 5892 "device_type", "cpu");
5843 5893
5844 5894 /* reg */
5845 5895 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5846 5896 "reg", cpu_id);
5847 5897
5848 5898 /* cpu-mhz, and clock-frequency */
5849 5899 if (cpu_freq > 0) {
5850 5900 long long mul;
5851 5901
5852 5902 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5853 5903 "cpu-mhz", cpu_freq);
5854 5904 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5855 5905 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5856 5906 "clock-frequency", (int)mul);
5857 5907 }
5858 5908
5859 5909 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5860 5910 return;
5861 5911 }
5862 5912
5863 5913 /* vendor-id */
5864 5914 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5865 5915 "vendor-id", cpi->cpi_vendorstr);
5866 5916
5867 5917 if (cpi->cpi_maxeax == 0) {
5868 5918 return;
5869 5919 }
5870 5920
5871 5921 /*
5872 5922 * family, model, and step
5873 5923 */
5874 5924 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5875 5925 "family", CPI_FAMILY(cpi));
5876 5926 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5877 5927 "cpu-model", CPI_MODEL(cpi));
5878 5928 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5879 5929 "stepping-id", CPI_STEP(cpi));
5880 5930
5881 5931 /* type */
5882 5932 switch (cpi->cpi_vendor) {
5883 5933 case X86_VENDOR_Intel:
5884 5934 create = 1;
5885 5935 break;
5886 5936 default:
5887 5937 create = 0;
5888 5938 break;
5889 5939 }
5890 5940 if (create)
5891 5941 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5892 5942 "type", CPI_TYPE(cpi));
5893 5943
5894 5944 /* ext-family */
5895 5945 switch (cpi->cpi_vendor) {
5896 5946 case X86_VENDOR_Intel:
5897 5947 case X86_VENDOR_AMD:
5898 5948 create = cpi->cpi_family >= 0xf;
5899 5949 break;
5900 5950 default:
5901 5951 create = 0;
5902 5952 break;
5903 5953 }
5904 5954 if (create)
5905 5955 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5906 5956 "ext-family", CPI_FAMILY_XTD(cpi));
5907 5957
5908 5958 /* ext-model */
5909 5959 switch (cpi->cpi_vendor) {
5910 5960 case X86_VENDOR_Intel:
5911 5961 create = IS_EXTENDED_MODEL_INTEL(cpi);
5912 5962 break;
5913 5963 case X86_VENDOR_AMD:
5914 5964 create = CPI_FAMILY(cpi) == 0xf;
5915 5965 break;
5916 5966 default:
5917 5967 create = 0;
5918 5968 break;
5919 5969 }
5920 5970 if (create)
5921 5971 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5922 5972 "ext-model", CPI_MODEL_XTD(cpi));
5923 5973
5924 5974 /* generation */
5925 5975 switch (cpi->cpi_vendor) {
5926 5976 case X86_VENDOR_AMD:
5927 5977 /*
5928 5978 * AMD K5 model 1 was the first part to support this
5929 5979 */
5930 5980 create = cpi->cpi_xmaxeax >= 0x80000001;
5931 5981 break;
5932 5982 default:
5933 5983 create = 0;
5934 5984 break;
5935 5985 }
5936 5986 if (create)
5937 5987 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5938 5988 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5939 5989
5940 5990 /* brand-id */
5941 5991 switch (cpi->cpi_vendor) {
5942 5992 case X86_VENDOR_Intel:
5943 5993 /*
5944 5994 * brand id first appeared on Pentium III Xeon model 8,
5945 5995 * and Celeron model 8 processors and Opteron
5946 5996 */
5947 5997 create = cpi->cpi_family > 6 ||
5948 5998 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5949 5999 break;
5950 6000 case X86_VENDOR_AMD:
5951 6001 create = cpi->cpi_family >= 0xf;
5952 6002 break;
5953 6003 default:
5954 6004 create = 0;
5955 6005 break;
5956 6006 }
5957 6007 if (create && cpi->cpi_brandid != 0) {
5958 6008 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5959 6009 "brand-id", cpi->cpi_brandid);
5960 6010 }
5961 6011
5962 6012 /* chunks, and apic-id */
5963 6013 switch (cpi->cpi_vendor) {
5964 6014 /*
5965 6015 * first available on Pentium IV and Opteron (K8)
5966 6016 */
5967 6017 case X86_VENDOR_Intel:
5968 6018 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5969 6019 break;
5970 6020 case X86_VENDOR_AMD:
5971 6021 create = cpi->cpi_family >= 0xf;
5972 6022 break;
5973 6023 default:
5974 6024 create = 0;
5975 6025 break;
5976 6026 }
5977 6027 if (create) {
5978 6028 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5979 6029 "chunks", CPI_CHUNKS(cpi));
5980 6030 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5981 6031 "apic-id", cpi->cpi_apicid);
5982 6032 if (cpi->cpi_chipid >= 0) {
5983 6033 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5984 6034 "chip#", cpi->cpi_chipid);
5985 6035 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5986 6036 "clog#", cpi->cpi_clogid);
5987 6037 }
5988 6038 }
5989 6039
5990 6040 /* cpuid-features */
5991 6041 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5992 6042 "cpuid-features", CPI_FEATURES_EDX(cpi));
5993 6043
5994 6044
5995 6045 /* cpuid-features-ecx */
5996 6046 switch (cpi->cpi_vendor) {
5997 6047 case X86_VENDOR_Intel:
5998 6048 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5999 6049 break;
6000 6050 case X86_VENDOR_AMD:
6001 6051 create = cpi->cpi_family >= 0xf;
6002 6052 break;
6003 6053 default:
6004 6054 create = 0;
6005 6055 break;
6006 6056 }
6007 6057 if (create)
6008 6058 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6009 6059 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6010 6060
6011 6061 /* ext-cpuid-features */
6012 6062 switch (cpi->cpi_vendor) {
6013 6063 case X86_VENDOR_Intel:
6014 6064 case X86_VENDOR_AMD:
6015 6065 case X86_VENDOR_Cyrix:
6016 6066 case X86_VENDOR_TM:
6017 6067 case X86_VENDOR_Centaur:
6018 6068 create = cpi->cpi_xmaxeax >= 0x80000001;
6019 6069 break;
6020 6070 default:
6021 6071 create = 0;
6022 6072 break;
6023 6073 }
6024 6074 if (create) {
6025 6075 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6026 6076 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6027 6077 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6028 6078 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6029 6079 }
6030 6080
6031 6081 /*
6032 6082 * Brand String first appeared in Intel Pentium IV, AMD K5
6033 6083 * model 1, and Cyrix GXm. On earlier models we try and
6034 6084 * simulate something similar .. so this string should always
6035 6085 * same -something- about the processor, however lame.
6036 6086 */
6037 6087 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6038 6088 "brand-string", cpi->cpi_brandstr);
6039 6089
6040 6090 /*
6041 6091 * Finally, cache and tlb information
6042 6092 */
6043 6093 switch (x86_which_cacheinfo(cpi)) {
6044 6094 case X86_VENDOR_Intel:
6045 6095 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6046 6096 break;
6047 6097 case X86_VENDOR_Cyrix:
6048 6098 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6049 6099 break;
6050 6100 case X86_VENDOR_AMD:
6051 6101 amd_cache_info(cpi, cpu_devi);
6052 6102 break;
6053 6103 default:
6054 6104 break;
6055 6105 }
6056 6106 }
6057 6107
6058 6108 struct l2info {
6059 6109 int *l2i_csz;
6060 6110 int *l2i_lsz;
6061 6111 int *l2i_assoc;
6062 6112 int l2i_ret;
6063 6113 };
6064 6114
6065 6115 /*
6066 6116 * A cacheinfo walker that fetches the size, line-size and associativity
6067 6117 * of the L2 cache
6068 6118 */
6069 6119 static int
6070 6120 intel_l2cinfo(void *arg, const struct cachetab *ct)
6071 6121 {
6072 6122 struct l2info *l2i = arg;
6073 6123 int *ip;
6074 6124
6075 6125 if (ct->ct_label != l2_cache_str &&
6076 6126 ct->ct_label != sl2_cache_str)
6077 6127 return (0); /* not an L2 -- keep walking */
6078 6128
6079 6129 if ((ip = l2i->l2i_csz) != NULL)
6080 6130 *ip = ct->ct_size;
6081 6131 if ((ip = l2i->l2i_lsz) != NULL)
6082 6132 *ip = ct->ct_line_size;
6083 6133 if ((ip = l2i->l2i_assoc) != NULL)
6084 6134 *ip = ct->ct_assoc;
6085 6135 l2i->l2i_ret = ct->ct_size;
6086 6136 return (1); /* was an L2 -- terminate walk */
6087 6137 }
6088 6138
6089 6139 /*
6090 6140 * AMD L2/L3 Cache and TLB Associativity Field Definition:
6091 6141 *
6092 6142 * Unlike the associativity for the L1 cache and tlb where the 8 bit
6093 6143 * value is the associativity, the associativity for the L2 cache and
6094 6144 * tlb is encoded in the following table. The 4 bit L2 value serves as
6095 6145 * an index into the amd_afd[] array to determine the associativity.
6096 6146 * -1 is undefined. 0 is fully associative.
6097 6147 */
6098 6148
6099 6149 static int amd_afd[] =
6100 6150 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6101 6151
6102 6152 static void
6103 6153 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6104 6154 {
6105 6155 struct cpuid_regs *cp;
6106 6156 uint_t size, assoc;
6107 6157 int i;
6108 6158 int *ip;
6109 6159
6110 6160 if (cpi->cpi_xmaxeax < 0x80000006)
6111 6161 return;
6112 6162 cp = &cpi->cpi_extd[6];
6113 6163
6114 6164 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6115 6165 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6116 6166 uint_t cachesz = size * 1024;
6117 6167 assoc = amd_afd[i];
6118 6168
6119 6169 ASSERT(assoc != -1);
6120 6170
6121 6171 if ((ip = l2i->l2i_csz) != NULL)
6122 6172 *ip = cachesz;
6123 6173 if ((ip = l2i->l2i_lsz) != NULL)
6124 6174 *ip = BITX(cp->cp_ecx, 7, 0);
6125 6175 if ((ip = l2i->l2i_assoc) != NULL)
6126 6176 *ip = assoc;
6127 6177 l2i->l2i_ret = cachesz;
6128 6178 }
6129 6179 }
6130 6180
6131 6181 int
6132 6182 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6133 6183 {
6134 6184 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6135 6185 struct l2info __l2info, *l2i = &__l2info;
6136 6186
6137 6187 l2i->l2i_csz = csz;
6138 6188 l2i->l2i_lsz = lsz;
6139 6189 l2i->l2i_assoc = assoc;
6140 6190 l2i->l2i_ret = -1;
6141 6191
6142 6192 switch (x86_which_cacheinfo(cpi)) {
6143 6193 case X86_VENDOR_Intel:
6144 6194 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6145 6195 break;
6146 6196 case X86_VENDOR_Cyrix:
6147 6197 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6148 6198 break;
6149 6199 case X86_VENDOR_AMD:
6150 6200 amd_l2cacheinfo(cpi, l2i);
6151 6201 break;
6152 6202 default:
6153 6203 break;
6154 6204 }
6155 6205 return (l2i->l2i_ret);
6156 6206 }
6157 6207
6158 6208 #if !defined(__xpv)
6159 6209
6160 6210 uint32_t *
6161 6211 cpuid_mwait_alloc(cpu_t *cpu)
6162 6212 {
6163 6213 uint32_t *ret;
6164 6214 size_t mwait_size;
6165 6215
6166 6216 ASSERT(cpuid_checkpass(CPU, 2));
6167 6217
6168 6218 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6169 6219 if (mwait_size == 0)
6170 6220 return (NULL);
6171 6221
6172 6222 /*
6173 6223 * kmem_alloc() returns cache line size aligned data for mwait_size
6174 6224 * allocations. mwait_size is currently cache line sized. Neither
6175 6225 * of these implementation details are guarantied to be true in the
6176 6226 * future.
6177 6227 *
6178 6228 * First try allocating mwait_size as kmem_alloc() currently returns
6179 6229 * correctly aligned memory. If kmem_alloc() does not return
6180 6230 * mwait_size aligned memory, then use mwait_size ROUNDUP.
6181 6231 *
6182 6232 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6183 6233 * decide to free this memory.
6184 6234 */
6185 6235 ret = kmem_zalloc(mwait_size, KM_SLEEP);
6186 6236 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6187 6237 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6188 6238 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6189 6239 *ret = MWAIT_RUNNING;
6190 6240 return (ret);
6191 6241 } else {
6192 6242 kmem_free(ret, mwait_size);
6193 6243 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6194 6244 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6195 6245 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6196 6246 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6197 6247 *ret = MWAIT_RUNNING;
6198 6248 return (ret);
6199 6249 }
6200 6250 }
6201 6251
6202 6252 void
6203 6253 cpuid_mwait_free(cpu_t *cpu)
6204 6254 {
6205 6255 if (cpu->cpu_m.mcpu_cpi == NULL) {
6206 6256 return;
6207 6257 }
6208 6258
6209 6259 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6210 6260 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6211 6261 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6212 6262 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6213 6263 }
6214 6264
6215 6265 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6216 6266 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6217 6267 }
6218 6268
6219 6269 void
6220 6270 patch_tsc_read(int flag)
6221 6271 {
6222 6272 size_t cnt;
6223 6273
6224 6274 switch (flag) {
6225 6275 case TSC_NONE:
6226 6276 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6227 6277 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6228 6278 break;
6229 6279 case TSC_RDTSC_MFENCE:
6230 6280 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6231 6281 (void) memcpy((void *)tsc_read,
6232 6282 (void *)&_tsc_mfence_start, cnt);
6233 6283 break;
6234 6284 case TSC_RDTSC_LFENCE:
6235 6285 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6236 6286 (void) memcpy((void *)tsc_read,
6237 6287 (void *)&_tsc_lfence_start, cnt);
6238 6288 break;
6239 6289 case TSC_TSCP:
6240 6290 cnt = &_tscp_end - &_tscp_start;
6241 6291 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6242 6292 break;
6243 6293 default:
6244 6294 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6245 6295 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6246 6296 break;
6247 6297 }
6248 6298 tsc_type = flag;
6249 6299 }
6250 6300
6251 6301 int
6252 6302 cpuid_deep_cstates_supported(void)
6253 6303 {
6254 6304 struct cpuid_info *cpi;
6255 6305 struct cpuid_regs regs;
6256 6306
6257 6307 ASSERT(cpuid_checkpass(CPU, 1));
6258 6308
6259 6309 cpi = CPU->cpu_m.mcpu_cpi;
6260 6310
6261 6311 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6262 6312 return (0);
6263 6313
6264 6314 switch (cpi->cpi_vendor) {
6265 6315 case X86_VENDOR_Intel:
6266 6316 if (cpi->cpi_xmaxeax < 0x80000007)
6267 6317 return (0);
6268 6318
6269 6319 /*
6270 6320 * TSC run at a constant rate in all ACPI C-states?
6271 6321 */
6272 6322 regs.cp_eax = 0x80000007;
6273 6323 (void) __cpuid_insn(®s);
6274 6324 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6275 6325
6276 6326 default:
6277 6327 return (0);
6278 6328 }
6279 6329 }
6280 6330
6281 6331 #endif /* !__xpv */
6282 6332
6283 6333 void
6284 6334 post_startup_cpu_fixups(void)
6285 6335 {
6286 6336 #ifndef __xpv
6287 6337 /*
6288 6338 * Some AMD processors support C1E state. Entering this state will
6289 6339 * cause the local APIC timer to stop, which we can't deal with at
6290 6340 * this time.
6291 6341 */
6292 6342 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6293 6343 on_trap_data_t otd;
6294 6344 uint64_t reg;
6295 6345
6296 6346 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6297 6347 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6298 6348 /* Disable C1E state if it is enabled by BIOS */
6299 6349 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6300 6350 AMD_ACTONCMPHALT_MASK) {
6301 6351 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6302 6352 AMD_ACTONCMPHALT_SHIFT);
6303 6353 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6304 6354 }
6305 6355 }
6306 6356 no_trap();
6307 6357 }
6308 6358 #endif /* !__xpv */
6309 6359 }
6310 6360
6311 6361 void
6312 6362 enable_pcid(void)
6313 6363 {
6314 6364 if (x86_use_pcid == -1)
6315 6365 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6316 6366
6317 6367 if (x86_use_invpcid == -1) {
6318 6368 x86_use_invpcid = is_x86_feature(x86_featureset,
6319 6369 X86FSET_INVPCID);
6320 6370 }
6321 6371
6322 6372 if (!x86_use_pcid)
6323 6373 return;
6324 6374
6325 6375 /*
6326 6376 * Intel say that on setting PCIDE, it immediately starts using the PCID
6327 6377 * bits; better make sure there's nothing there.
6328 6378 */
6329 6379 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6330 6380
6331 6381 setcr4(getcr4() | CR4_PCIDE);
6332 6382 }
6333 6383
6334 6384 /*
6335 6385 * Setup necessary registers to enable XSAVE feature on this processor.
6336 6386 * This function needs to be called early enough, so that no xsave/xrstor
6337 6387 * ops will execute on the processor before the MSRs are properly set up.
6338 6388 *
6339 6389 * Current implementation has the following assumption:
6340 6390 * - cpuid_pass1() is done, so that X86 features are known.
6341 6391 * - fpu_probe() is done, so that fp_save_mech is chosen.
6342 6392 */
6343 6393 void
6344 6394 xsave_setup_msr(cpu_t *cpu)
6345 6395 {
6346 6396 ASSERT(fp_save_mech == FP_XSAVE);
6347 6397 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6348 6398
6349 6399 /* Enable OSXSAVE in CR4. */
6350 6400 setcr4(getcr4() | CR4_OSXSAVE);
6351 6401 /*
6352 6402 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6353 6403 * correct value.
6354 6404 */
6355 6405 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6356 6406 setup_xfem();
6357 6407 }
6358 6408
6359 6409 /*
6360 6410 * Starting with the Westmere processor the local
6361 6411 * APIC timer will continue running in all C-states,
6362 6412 * including the deepest C-states.
6363 6413 */
6364 6414 int
6365 6415 cpuid_arat_supported(void)
6366 6416 {
6367 6417 struct cpuid_info *cpi;
6368 6418 struct cpuid_regs regs;
6369 6419
6370 6420 ASSERT(cpuid_checkpass(CPU, 1));
6371 6421 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6372 6422
6373 6423 cpi = CPU->cpu_m.mcpu_cpi;
6374 6424
6375 6425 switch (cpi->cpi_vendor) {
6376 6426 case X86_VENDOR_Intel:
6377 6427 /*
6378 6428 * Always-running Local APIC Timer is
6379 6429 * indicated by CPUID.6.EAX[2].
6380 6430 */
6381 6431 if (cpi->cpi_maxeax >= 6) {
6382 6432 regs.cp_eax = 6;
6383 6433 (void) cpuid_insn(NULL, ®s);
6384 6434 return (regs.cp_eax & CPUID_CSTATE_ARAT);
6385 6435 } else {
6386 6436 return (0);
6387 6437 }
6388 6438 default:
6389 6439 return (0);
6390 6440 }
6391 6441 }
6392 6442
6393 6443 /*
6394 6444 * Check support for Intel ENERGY_PERF_BIAS feature
6395 6445 */
6396 6446 int
6397 6447 cpuid_iepb_supported(struct cpu *cp)
6398 6448 {
6399 6449 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6400 6450 struct cpuid_regs regs;
6401 6451
6402 6452 ASSERT(cpuid_checkpass(cp, 1));
6403 6453
6404 6454 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6405 6455 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6406 6456 return (0);
6407 6457 }
6408 6458
6409 6459 /*
6410 6460 * Intel ENERGY_PERF_BIAS MSR is indicated by
6411 6461 * capability bit CPUID.6.ECX.3
6412 6462 */
6413 6463 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6414 6464 return (0);
6415 6465
6416 6466 regs.cp_eax = 0x6;
6417 6467 (void) cpuid_insn(NULL, ®s);
6418 6468 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6419 6469 }
6420 6470
6421 6471 /*
6422 6472 * Check support for TSC deadline timer
6423 6473 *
6424 6474 * TSC deadline timer provides a superior software programming
6425 6475 * model over local APIC timer that eliminates "time drifts".
6426 6476 * Instead of specifying a relative time, software specifies an
6427 6477 * absolute time as the target at which the processor should
6428 6478 * generate a timer event.
6429 6479 */
6430 6480 int
6431 6481 cpuid_deadline_tsc_supported(void)
6432 6482 {
6433 6483 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6434 6484 struct cpuid_regs regs;
6435 6485
6436 6486 ASSERT(cpuid_checkpass(CPU, 1));
6437 6487 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6438 6488
6439 6489 switch (cpi->cpi_vendor) {
6440 6490 case X86_VENDOR_Intel:
6441 6491 if (cpi->cpi_maxeax >= 1) {
6442 6492 regs.cp_eax = 1;
6443 6493 (void) cpuid_insn(NULL, ®s);
6444 6494 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6445 6495 } else {
6446 6496 return (0);
6447 6497 }
6448 6498 default:
6449 6499 return (0);
6450 6500 }
6451 6501 }
6452 6502
6453 6503 #if defined(__amd64) && !defined(__xpv)
6454 6504 /*
6455 6505 * Patch in versions of bcopy for high performance Intel Nhm processors
6456 6506 * and later...
6457 6507 */
6458 6508 void
6459 6509 patch_memops(uint_t vendor)
6460 6510 {
6461 6511 size_t cnt, i;
6462 6512 caddr_t to, from;
6463 6513
6464 6514 if ((vendor == X86_VENDOR_Intel) &&
6465 6515 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6466 6516 cnt = &bcopy_patch_end - &bcopy_patch_start;
6467 6517 to = &bcopy_ck_size;
6468 6518 from = &bcopy_patch_start;
6469 6519 for (i = 0; i < cnt; i++) {
6470 6520 *to++ = *from++;
6471 6521 }
6472 6522 }
6473 6523 }
6474 6524 #endif /* __amd64 && !__xpv */
6475 6525
6476 6526 /*
6477 6527 * We're being asked to tell the system how many bits are required to represent
6478 6528 * the various thread and strand IDs. While it's tempting to derive this based
6479 6529 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6480 6530 * correct. Instead, this needs to be based on the number of bits that the APIC
6481 6531 * allows for these different configurations. We only update these to a larger
6482 6532 * value if we find one.
6483 6533 */
6484 6534 void
6485 6535 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6486 6536 {
6487 6537 struct cpuid_info *cpi;
6488 6538
6489 6539 VERIFY(cpuid_checkpass(CPU, 1));
6490 6540 cpi = cpu->cpu_m.mcpu_cpi;
6491 6541
6492 6542 if (cpi->cpi_ncore_bits > *core_nbits) {
6493 6543 *core_nbits = cpi->cpi_ncore_bits;
6494 6544 }
6495 6545
6496 6546 if (cpi->cpi_nthread_bits > *strand_nbits) {
6497 6547 *strand_nbits = cpi->cpi_nthread_bits;
6498 6548 }
6499 6549 }
6500 6550
6501 6551 void
6502 6552 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6503 6553 {
6504 6554 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6505 6555 struct cpuid_regs cp;
6506 6556
6507 6557 /*
6508 6558 * Reread the CPUID portions that we need for various security
6509 6559 * information.
6510 6560 */
6511 6561 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6512 6562 /*
6513 6563 * Check if we now have leaf 7 available to us.
6514 6564 */
6515 6565 if (cpi->cpi_maxeax < 7) {
6516 6566 bzero(&cp, sizeof (cp));
6517 6567 cp.cp_eax = 0;
6518 6568 cpi->cpi_maxeax = __cpuid_insn(&cp);
6519 6569 if (cpi->cpi_maxeax < 7)
6520 6570 return;
6521 6571 }
6522 6572
6523 6573 bzero(&cp, sizeof (cp));
6524 6574 cp.cp_eax = 7;
6525 6575 cp.cp_ecx = 0;
6526 6576 (void) __cpuid_insn(&cp);
6527 6577 cpi->cpi_std[7] = cp;
6528 6578 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6529 6579 /* No xcpuid support */
6530 6580 if (cpi->cpi_family < 5 ||
6531 6581 (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6532 6582 return;
6533 6583
6534 6584 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6535 6585 bzero(&cp, sizeof (cp));
6536 6586 cp.cp_eax = CPUID_LEAF_EXT_0;
6537 6587 cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6538 6588 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6539 6589 return;
6540 6590 }
6541 6591 }
6542 6592
6543 6593 bzero(&cp, sizeof (cp));
6544 6594 cp.cp_eax = CPUID_LEAF_EXT_8;
6545 6595 (void) __cpuid_insn(&cp);
6546 6596 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6547 6597 cpi->cpi_extd[8] = cp;
6548 6598 } else {
6549 6599 /*
6550 6600 * Nothing to do here. Return an empty set which has already
6551 6601 * been zeroed for us.
6552 6602 */
6553 6603 return;
6554 6604 }
6555 6605 cpuid_scan_security(cpu, fset);
6556 6606 }
6557 6607
6558 6608 /* ARGSUSED */
6559 6609 static int
6560 6610 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6561 6611 {
6562 6612 uchar_t *fset;
6563 6613
6564 6614 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6565 6615 cpuid_pass_ucode(CPU, fset);
6566 6616
6567 6617 return (0);
6568 6618 }
6569 6619
6570 6620 /*
6571 6621 * After a microcode update where the version has changed, then we need to
6572 6622 * rescan CPUID. To do this we check every CPU to make sure that they have the
6573 6623 * same microcode. Then we perform a cross call to all such CPUs. It's the
6574 6624 * caller's job to make sure that no one else can end up doing an update while
6575 6625 * this is going on.
6576 6626 *
6577 6627 * We assume that the system is microcode capable if we're called.
6578 6628 */
6579 6629 void
6580 6630 cpuid_post_ucodeadm(void)
6581 6631 {
6582 6632 uint32_t rev;
6583 6633 int i;
6584 6634 struct cpu *cpu;
6585 6635 cpuset_t cpuset;
6586 6636 void *argdata;
6587 6637 uchar_t *f0;
6588 6638
6589 6639 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6590 6640
6591 6641 mutex_enter(&cpu_lock);
6592 6642 cpu = cpu_get(0);
6593 6643 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6594 6644 CPUSET_ONLY(cpuset, 0);
6595 6645 for (i = 1; i < max_ncpus; i++) {
6596 6646 if ((cpu = cpu_get(i)) == NULL)
6597 6647 continue;
6598 6648
6599 6649 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6600 6650 panic("post microcode update CPU %d has differing "
6601 6651 "microcode revision (%u) from CPU 0 (%u)",
6602 6652 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6603 6653 }
6604 6654 CPUSET_ADD(cpuset, i);
6605 6655 }
6606 6656
6607 6657 kpreempt_disable();
6608 6658 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6609 6659 cpuid_post_ucodeadm_xc);
6610 6660 kpreempt_enable();
6611 6661
6612 6662 /*
6613 6663 * OK, now look at each CPU and see if their feature sets are equal.
6614 6664 */
6615 6665 f0 = argdata;
6616 6666 for (i = 1; i < max_ncpus; i++) {
6617 6667 uchar_t *fset;
6618 6668 if (!CPU_IN_SET(cpuset, i))
6619 6669 continue;
6620 6670
6621 6671 fset = (uchar_t *)((uintptr_t)argdata +
6622 6672 sizeof (x86_featureset) * i);
6623 6673
6624 6674 if (!compare_x86_featureset(f0, fset)) {
6625 6675 panic("Post microcode update CPU %d has "
6626 6676 "differing security feature (%p) set from CPU 0 "
6627 6677 "(%p), not appending to feature set", i,
6628 6678 (void *)fset, (void *)f0);
6629 6679 }
6630 6680 }
6631 6681
6632 6682 mutex_exit(&cpu_lock);
6633 6683
6634 6684 for (i = 0; i < NUM_X86_FEATURES; i++) {
6635 6685 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6636 6686 x86_feature_names[i]);
6637 6687 if (is_x86_feature(f0, i)) {
6638 6688 add_x86_feature(x86_featureset, i);
6639 6689 }
6640 6690 }
6641 6691 kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6642 6692 }
↓ open down ↓ |
4426 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX