Print this page
10924 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 26 */
27 27 /*
28 28 * Copyright (c) 2010, Intel Corporation.
29 29 * All rights reserved.
30 30 */
31 31 /*
32 32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 33 */
34 34 /*
35 35 * Copyright 2019, Joyent, Inc.
36 36 */
37 37
38 38 /*
39 39 * CPU Identification logic
40 40 *
41 41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
42 42 * with the identification of CPUs, their features, and their topologies. More
43 43 * specifically, this file helps drive the following:
44 44 *
45 45 * 1. Enumeration of features of the processor which are used by the kernel to
46 46 * determine what features to enable or disable. These may be instruction set
47 47 * enhancements or features that we use.
48 48 *
49 49 * 2. Enumeration of instruction set architecture (ISA) additions that userland
50 50 * will be told about through the auxiliary vector.
51 51 *
52 52 * 3. Understanding the physical topology of the CPU such as the number of
53 53 * caches, how many cores it has, whether or not it supports symmetric
54 54 * multi-processing (SMT), etc.
55 55 *
56 56 * ------------------------
57 57 * CPUID History and Basics
58 58 * ------------------------
59 59 *
60 60 * The cpuid instruction was added by Intel roughly around the time that the
61 61 * original Pentium was introduced. The purpose of cpuid was to tell in a
62 62 * programmatic fashion information about the CPU that previously was guessed
63 63 * at. For example, an important part of cpuid is that we can know what
64 64 * extensions to the ISA exist. If you use an invalid opcode you would get a
65 65 * #UD, so this method allows a program (whether a user program or the kernel)
66 66 * to determine what exists without crashing or getting a SIGILL. Of course,
67 67 * this was also during the era of the clones and the AMD Am5x86. The vendor
68 68 * name shows up first in cpuid for a reason.
69 69 *
70 70 * cpuid information is broken down into ranges called a 'leaf'. Each leaf puts
71 71 * unique values into the registers %eax, %ebx, %ecx, and %edx and each leaf has
72 72 * its own meaning. The different leaves are broken down into different regions:
73 73 *
74 74 * [ 0, 7fffffff ] This region is called the 'basic'
75 75 * region. This region is generally defined
76 76 * by Intel, though some of the original
77 77 * portions have different meanings based
78 78 * on the manufacturer. These days, Intel
79 79 * adds most new features to this region.
80 80 * AMD adds non-Intel compatible
81 81 * information in the third, extended
82 82 * region. Intel uses this for everything
83 83 * including ISA extensions, CPU
84 84 * features, cache information, topology,
85 85 * and more.
86 86 *
87 87 * There is a hole carved out of this
88 88 * region which is reserved for
89 89 * hypervisors.
90 90 *
91 91 * [ 40000000, 4fffffff ] This region, which is found in the
92 92 * middle of the previous region, is
93 93 * explicitly promised to never be used by
94 94 * CPUs. Instead, it is used by hypervisors
95 95 * to communicate information about
96 96 * themselves to the operating system. The
97 97 * values and details are unique for each
98 98 * hypervisor.
99 99 *
100 100 * [ 80000000, ffffffff ] This region is called the 'extended'
101 101 * region. Some of the low leaves mirror
102 102 * parts of the basic leaves. This region
103 103 * has generally been used by AMD for
104 104 * various extensions. For example, AMD-
105 105 * specific information about caches,
106 106 * features, and topology are found in this
107 107 * region.
108 108 *
109 109 * To specify a range, you place the desired leaf into %eax, zero %ebx, %ecx,
110 110 * and %edx, and then issue the cpuid instruction. At the first leaf in each of
111 111 * the ranges, one of the primary things returned is the maximum valid leaf in
112 112 * that range. This allows for discovery of what range of CPUID is valid.
113 113 *
114 114 * The CPUs have potentially surprising behavior when using an invalid leaf or
115 115 * unimplemented leaf. If the requested leaf is within the valid basic or
116 116 * extended range, but is unimplemented, then %eax, %ebx, %ecx, and %edx will be
117 117 * set to zero. However, if you specify a leaf that is outside of a valid range,
118 118 * then instead it will be filled with the last valid _basic_ leaf. For example,
119 119 * if the maximum basic value is on leaf 0x3, then issuing a cpuid for leaf 4 or
120 120 * an invalid extended leaf will return the information for leaf 3.
121 121 *
122 122 * Some leaves are broken down into sub-leaves. This means that the value
123 123 * depends on both the leaf asked for in %eax and a secondary register. For
124 124 * example, Intel uses the value in %ecx on leaf 7 to indicate a sub-leaf to get
125 125 * additional information. Or when getting topology information in leaf 0xb, the
126 126 * initial value in %ecx changes which level of the topology that you are
127 127 * getting information about.
128 128 *
129 129 * cpuid values are always kept to 32 bits regardless of whether or not the
130 130 * program is in 64-bit mode. When executing in 64-bit mode, the upper
131 131 * 32 bits of the register are always set to zero so that way the values are the
132 132 * same regardless of execution mode.
133 133 *
134 134 * ----------------------
135 135 * Identifying Processors
136 136 * ----------------------
137 137 *
138 138 * We can identify a processor in two steps. The first step looks at cpuid leaf
139 139 * 0. Leaf 0 contains the processor's vendor information. This is done by
140 140 * putting a 12 character string in %ebx, %ecx, and %edx. On AMD, it is
141 141 * 'AuthenticAMD' and on Intel it is 'GenuineIntel'.
142 142 *
143 143 * From there, a processor is identified by a combination of three different
144 144 * values:
145 145 *
146 146 * 1. Family
147 147 * 2. Model
148 148 * 3. Stepping
149 149 *
150 150 * Each vendor uses the family and model to uniquely identify a processor. The
151 151 * way that family and model are changed depends on the vendor. For example,
152 152 * Intel has been using family 0x6 for almost all of their processor since the
153 153 * Pentium Pro/Pentium II era, often called the P6. The model is used to
154 154 * identify the exact processor. Different models are often used for the client
155 155 * (consumer) and server parts. Even though each processor often has major
156 156 * architectural differences, they still are considered the same family by
157 157 * Intel.
158 158 *
159 159 * On the other hand, each major AMD architecture generally has its own family.
160 160 * For example, the K8 is family 0x10, Bulldozer 0x15, and Zen 0x17. Within it
161 161 * the model number is used to help identify specific processors.
162 162 *
163 163 * The stepping is used to refer to a revision of a specific microprocessor. The
164 164 * term comes from equipment used to produce masks that are used to create
165 165 * integrated circuits.
166 166 *
167 167 * The information is present in leaf 1, %eax. In technical documentation you
168 168 * will see the terms extended model and extended family. The original family,
169 169 * model, and stepping fields were each 4 bits wide. If the values in either
170 170 * are 0xf, then one is to consult the extended model and extended family, which
171 171 * take previously reserved bits and allow for a larger number of models and add
172 172 * 0xf to them.
173 173 *
174 174 * When we process this information, we store the full family, model, and
175 175 * stepping in the struct cpuid_info members cpi_family, cpi_model, and
176 176 * cpi_step, respectively. Whenever you are performing comparisons with the
177 177 * family, model, and stepping, you should use these members and not the raw
178 178 * values from cpuid. If you must use the raw values from cpuid directly, you
179 179 * must make sure that you add the extended model and family to the base model
180 180 * and family.
181 181 *
182 182 * In general, we do not use information about the family, model, and stepping
183 183 * to determine whether or not a feature is present; that is generally driven by
184 184 * specific leaves. However, when something we care about on the processor is
185 185 * not considered 'architectural' meaning that it is specific to a set of
186 186 * processors and not promised in the architecture model to be consistent from
187 187 * generation to generation, then we will fall back on this information. The
188 188 * most common cases where this comes up is when we have to workaround errata in
189 189 * the processor, are dealing with processor-specific features such as CPU
190 190 * performance counters, or we want to provide additional information for things
191 191 * such as fault management.
192 192 *
193 193 * While processors also do have a brand string, which is the name that people
194 194 * are familiar with when buying the processor, they are not meant for
195 195 * programmatic consumption. That is what the family, model, and stepping are
196 196 * for.
197 197 *
198 198 * ------------
199 199 * CPUID Passes
200 200 * ------------
201 201 *
202 202 * As part of performing feature detection, we break this into several different
203 203 * passes. The passes are as follows:
204 204 *
205 205 * Pass 0 This is a primordial pass done in locore.s to deal with
206 206 * Cyrix CPUs that don't support cpuid. The reality is that
207 207 * we likely don't run on them any more, but there is still
208 208 * logic for handling them.
209 209 *
210 210 * Pass 1 This is the primary pass and is responsible for doing a
211 211 * large number of different things:
212 212 *
213 213 * 1. Determine which vendor manufactured the CPU and
214 214 * determining the family, model, and stepping information.
215 215 *
216 216 * 2. Gathering a large number of feature flags to
217 217 * determine which features the CPU support and which
218 218 * indicate things that we need to do other work in the OS
219 219 * to enable. Features detected this way are added to the
220 220 * x86_featureset which can be queried to
221 221 * determine what we should do. This includes processing
222 222 * all of the basic and extended CPU features that we care
223 223 * about.
224 224 *
225 225 * 3. Determining the CPU's topology. This includes
226 226 * information about how many cores and threads are present
227 227 * in the package. It also is responsible for figuring out
228 228 * which logical CPUs are potentially part of the same core
229 229 * and what other resources they might share. For more
230 230 * information see the 'Topology' section.
231 231 *
232 232 * 4. Determining the set of CPU security-specific features
233 233 * that we need to worry about and determine the
234 234 * appropriate set of workarounds.
235 235 *
236 236 * Pass 1 on the boot CPU occurs before KMDB is started.
237 237 *
238 238 * Pass 2 The second pass is done after startup(). Here, we check
239 239 * other miscellaneous features. Most of this is gathering
240 240 * additional basic and extended features that we'll use in
241 241 * later passes or for debugging support.
242 242 *
243 243 * Pass 3 The third pass occurs after the kernel memory allocator
244 244 * has been fully initialized. This gathers information
245 245 * where we might need dynamic memory available for our
246 246 * uses. This includes several varying width leaves that
247 247 * have cache information and the processor's brand string.
248 248 *
249 249 * Pass 4 The fourth and final normal pass is performed after the
250 250 * kernel has brought most everything online. This is
251 251 * invoked from post_startup(). In this pass, we go through
252 252 * the set of features that we have enabled and turn that
253 253 * into the hardware auxiliary vector features that
254 254 * userland receives. This is used by userland, primarily
255 255 * by the run-time link-editor (RTLD), though userland
256 256 * software could also refer to it directly.
257 257 *
258 258 * Microcode After a microcode update, we do a selective rescan of
259 259 * the cpuid leaves to determine what features have
260 260 * changed. Microcode updates can provide more details
261 261 * about security related features to deal with issues like
262 262 * Spectre and L1TF. On occasion, vendors have violated
263 263 * their contract and removed bits. However, we don't try
264 264 * to detect that because that puts us in a situation that
265 265 * we really can't deal with. As such, the only thing we
266 266 * rescan are security related features today. See
267 267 * cpuid_pass_ucode().
268 268 *
269 269 * All of the passes (except pass 0) are run on all CPUs. However, for the most
270 270 * part we only care about what the boot CPU says about this information and use
271 271 * the other CPUs as a rough guide to sanity check that we have the same feature
272 272 * set.
273 273 *
274 274 * We do not support running multiple logical CPUs with disjoint, let alone
275 275 * different, feature sets.
276 276 *
277 277 * ------------------
278 278 * Processor Topology
279 279 * ------------------
280 280 *
281 281 * One of the important things that we need to do is to understand the topology
282 282 * of the underlying processor. When we say topology in this case, we're trying
283 283 * to understand the relationship between the logical CPUs that the operating
284 284 * system sees and the underlying physical layout. Different logical CPUs may
285 285 * share different resources which can have important consequences for the
286 286 * performance of the system. For example, they may share caches, execution
287 287 * units, and more.
288 288 *
289 289 * The topology of the processor changes from generation to generation and
290 290 * vendor to vendor. Along with that, different vendors use different
291 291 * terminology, and the operating system itself uses occasionally overlapping
292 292 * terminology. It's important to understand what this topology looks like so
293 293 * one can understand the different things that we try to calculate and
294 294 * determine.
295 295 *
296 296 * To get started, let's talk about a little bit of terminology that we've used
297 297 * so far, is used throughout this file, and is fairly generic across multiple
298 298 * vendors:
299 299 *
300 300 * CPU
301 301 * A central processing unit (CPU) refers to a logical and/or virtual
302 302 * entity that the operating system can execute instructions on. The
303 303 * underlying resources for this CPU may be shared between multiple
304 304 * entities; however, to the operating system it is a discrete unit.
305 305 *
306 306 * PROCESSOR and PACKAGE
307 307 *
308 308 * Generally, when we use the term 'processor' on its own, we are referring
309 309 * to the physical entity that one buys and plugs into a board. However,
310 310 * because processor has been overloaded and one might see it used to mean
311 311 * multiple different levels, we will instead use the term 'package' for
312 312 * the rest of this file. The term package comes from the electrical
313 313 * engineering side and refers to the physical entity that encloses the
314 314 * electronics inside. Strictly speaking the package can contain more than
315 315 * just the CPU, for example, on many processors it may also have what's
316 316 * called an 'integrated graphical processing unit (GPU)'. Because the
317 317 * package can encapsulate multiple units, it is the largest physical unit
318 318 * that we refer to.
319 319 *
320 320 * SOCKET
321 321 *
322 322 * A socket refers to unit on a system board (generally the motherboard)
323 323 * that can receive a package. A single package, or processor, is plugged
324 324 * into a single socket. A system may have multiple sockets. Often times,
325 325 * the term socket is used interchangeably with package and refers to the
326 326 * electrical component that has plugged in, and not the receptacle itself.
327 327 *
328 328 * CORE
329 329 *
330 330 * A core refers to the physical instantiation of a CPU, generally, with a
331 331 * full set of hardware resources available to it. A package may contain
332 332 * multiple cores inside of it or it may just have a single one. A
333 333 * processor with more than one core is often referred to as 'multi-core'.
334 334 * In illumos, we will use the feature X86FSET_CMP to refer to a system
335 335 * that has 'multi-core' processors.
336 336 *
337 337 * A core may expose a single logical CPU to the operating system, or it
338 338 * may expose multiple CPUs, which we call threads, defined below.
339 339 *
340 340 * Some resources may still be shared by cores in the same package. For
341 341 * example, many processors will share the level 3 cache between cores.
342 342 * Some AMD generations share hardware resources between cores. For more
343 343 * information on that see the section 'AMD Topology'.
344 344 *
345 345 * THREAD and STRAND
346 346 *
347 347 * In this file, generally a thread refers to a hardware resources and not
348 348 * the operating system's logical abstraction. A thread is always exposed
349 349 * as an independent logical CPU to the operating system. A thread belongs
350 350 * to a specific core. A core may have more than one thread. When that is
351 351 * the case, the threads that are part of the same core are often referred
352 352 * to as 'siblings'.
353 353 *
354 354 * When multiple threads exist, this is generally referred to as
355 355 * simultaneous multi-threading (SMT). When Intel introduced this in their
356 356 * processors they called it hyper-threading (HT). When multiple threads
357 357 * are active in a core, they split the resources of the core. For example,
358 358 * two threads may share the same set of hardware execution units.
359 359 *
360 360 * The operating system often uses the term 'strand' to refer to a thread.
361 361 * This helps disambiguate it from the software concept.
362 362 *
363 363 * CHIP
364 364 *
365 365 * Unfortunately, the term 'chip' is dramatically overloaded. At its most
366 366 * base meaning, it is used to refer to a single integrated circuit, which
367 367 * may or may not be the only thing in the package. In illumos, when you
368 368 * see the term 'chip' it is almost always referring to the same thing as
369 369 * the 'package'. However, many vendors may use chip to refer to one of
370 370 * many integrated circuits that have been placed in the package. As an
371 371 * example, see the subsequent definition.
372 372 *
373 373 * To try and keep things consistent, we will only use chip when referring
374 374 * to the entire integrated circuit package, with the exception of the
375 375 * definition of multi-chip module (because it is in the name) and use the
376 376 * term 'die' when we want the more general, potential sub-component
377 377 * definition.
378 378 *
379 379 * DIE
380 380 *
381 381 * A die refers to an integrated circuit. Inside of the package there may
382 382 * be a single die or multiple dies. This is sometimes called a 'chip' in
383 383 * vendor's parlance, but in this file, we use the term die to refer to a
384 384 * subcomponent.
385 385 *
386 386 * MULTI-CHIP MODULE
387 387 *
388 388 * A multi-chip module (MCM) refers to putting multiple distinct chips that
389 389 * are connected together in the same package. When a multi-chip design is
390 390 * used, generally each chip is manufactured independently and then joined
391 391 * together in the package. For example, on AMD's Zen microarchitecture
392 392 * (family 0x17), the package contains several dies (the second meaning of
393 393 * chip from above) that are connected together.
394 394 *
395 395 * CACHE
396 396 *
397 397 * A cache is a part of the processor that maintains copies of recently
398 398 * accessed memory. Caches are split into levels and then into types.
399 399 * Commonly there are one to three levels, called level one, two, and
400 400 * three. The lower the level, the smaller it is, the closer it is to the
401 401 * execution units of the CPU, and the faster it is to access. The layout
402 402 * and design of the cache come in many different flavors, consult other
403 403 * resources for a discussion of those.
404 404 *
405 405 * Caches are generally split into two types, the instruction and data
406 406 * cache. The caches contain what their names suggest, the instruction
407 407 * cache has executable program text, while the data cache has all other
408 408 * memory that the processor accesses. As of this writing, data is kept
409 409 * coherent between all of the caches on x86, so if one modifies program
410 410 * text before it is executed, that will be in the data cache, and the
411 411 * instruction cache will be synchronized with that change when the
412 412 * processor actually executes those instructions. This coherency also
413 413 * covers the fact that data could show up in multiple caches.
414 414 *
415 415 * Generally, the lowest level caches are specific to a core. However, the
416 416 * last layer cache is shared between some number of cores. The number of
417 417 * CPUs sharing this last level cache is important. This has implications
418 418 * for the choices that the scheduler makes, as accessing memory that might
419 419 * be in a remote cache after thread migration can be quite expensive.
420 420 *
421 421 * Sometimes, the word cache is abbreviated with a '$', because in US
422 422 * English the word cache is pronounced the same as cash. So L1D$ refers to
423 423 * the L1 data cache, and L2$ would be the L2 cache. This will not be used
424 424 * in the rest of this theory statement for clarity.
425 425 *
426 426 * MEMORY CONTROLLER
427 427 *
428 428 * The memory controller is a component that provides access to DRAM. Each
429 429 * memory controller can access a set number of DRAM channels. Each channel
430 430 * can have a number of DIMMs (sticks of memory) associated with it. A
431 431 * given package may have more than one memory controller. The association
432 432 * of the memory controller to a group of cores is important as it is
433 433 * cheaper to access memory on the controller that you are associated with.
434 434 *
435 435 * NUMA
436 436 *
437 437 * NUMA or non-uniform memory access, describes a way that systems are
438 438 * built. On x86, any processor core can address all of the memory in the
439 439 * system. However, When using multiple sockets or possibly within a
440 440 * multi-chip module, some of that memory is physically closer and some of
441 441 * it is further. Memory that is further away is more expensive to access.
442 442 * Consider the following image of multiple sockets with memory:
443 443 *
444 444 * +--------+ +--------+
445 445 * | DIMM A | +----------+ +----------+ | DIMM D |
446 446 * +--------+-+ | | | | +-+------+-+
447 447 * | DIMM B |=======| Socket 0 |======| Socket 1 |=======| DIMM E |
448 448 * +--------+-+ | | | | +-+------+-+
449 449 * | DIMM C | +----------+ +----------+ | DIMM F |
450 450 * +--------+ +--------+
451 451 *
452 452 * In this example, Socket 0 is closer to DIMMs A-C while Socket 1 is
453 453 * closer to DIMMs D-F. This means that it is cheaper for socket 0 to
454 454 * access DIMMs A-C and more expensive to access D-F as it has to go
455 455 * through Socket 1 to get there. The inverse is true for Socket 1. DIMMs
456 456 * D-F are cheaper than A-C. While the socket form is the most common, when
457 457 * using multi-chip modules, this can also sometimes occur. For another
458 458 * example of this that's more involved, see the AMD topology section.
459 459 *
460 460 *
461 461 * Intel Topology
462 462 * --------------
463 463 *
464 464 * Most Intel processors since Nehalem, (as of this writing the current gen
465 465 * is Skylake / Cannon Lake) follow a fairly similar pattern. The CPU portion of
466 466 * the package is a single monolithic die. MCMs currently aren't used. Most
467 467 * parts have three levels of caches, with the L3 cache being shared between
468 468 * all of the cores on the package. The L1/L2 cache is generally specific to
469 469 * an individual core. The following image shows at a simplified level what
470 470 * this looks like. The memory controller is commonly part of something called
471 471 * the 'Uncore', that used to be separate physical chips that were not a part of
472 472 * the package, but are now part of the same chip.
473 473 *
474 474 * +-----------------------------------------------------------------------+
475 475 * | Package |
476 476 * | +-------------------+ +-------------------+ +-------------------+ |
477 477 * | | Core | | Core | | Core | |
478 478 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
479 479 * | | | Thread | | L | | | | Thread | | L | | | | Thread | | L | | |
480 480 * | | +--------+ | 1 | | | +--------+ | 1 | | | +--------+ | 1 | | |
481 481 * | | +--------+ | | | | +--------+ | | | | +--------+ | | | |
482 482 * | | | Thread | | | | | | Thread | | | | | | Thread | | | | |
483 483 * | | +--------+ +---+ | | +--------+ +---+ | | +--------+ +---+ | |
484 484 * | | +--------------+ | | +--------------+ | | +--------------+ | |
485 485 * | | | L2 Cache | | | | L2 Cache | | | | L2 Cache | | |
486 486 * | | +--------------+ | | +--------------+ | | +--------------+ | |
487 487 * | +-------------------+ +-------------------+ +-------------------+ |
488 488 * | +-------------------------------------------------------------------+ |
489 489 * | | Shared L3 Cache | |
490 490 * | +-------------------------------------------------------------------+ |
491 491 * | +-------------------------------------------------------------------+ |
492 492 * | | Memory Controller | |
493 493 * | +-------------------------------------------------------------------+ |
494 494 * +-----------------------------------------------------------------------+
495 495 *
496 496 * A side effect of this current architecture is that what we care about from a
497 497 * scheduling and topology perspective, is simplified. In general we care about
498 498 * understanding which logical CPUs are part of the same core and socket.
499 499 *
500 500 * To determine the relationship between threads and cores, Intel initially used
501 501 * the identifier in the advanced programmable interrupt controller (APIC). They
502 502 * also added cpuid leaf 4 to give additional information about the number of
503 503 * threads and CPUs in the processor. With the addition of x2apic (which
504 504 * increased the number of addressable logical CPUs from 8-bits to 32-bits), an
505 505 * additional cpuid topology leaf 0xB was added.
506 506 *
507 507 * AMD Topology
508 508 * ------------
509 509 *
510 510 * When discussing AMD topology, we want to break this into three distinct
511 511 * generations of topology. There's the basic topology that has been used in
512 512 * family 0xf+ (Opteron, Athlon64), there's the topology that was introduced
513 513 * with family 0x15 (Bulldozer), and there's the topology that was introduced
514 514 * with family 0x17 (Zen). AMD also has some additional terminology that's worth
515 515 * talking about.
516 516 *
517 517 * Until the introduction of family 0x17 (Zen), AMD did not implement something
518 518 * that they considered SMT. Whether or not the AMD processors have SMT
519 519 * influences many things including scheduling and reliability, availability,
520 520 * and serviceability (RAS) features.
521 521 *
522 522 * NODE
523 523 *
524 524 * AMD uses the term node to refer to a die that contains a number of cores
525 525 * and I/O resources. Depending on the processor family and model, more
526 526 * than one node can be present in the package. When there is more than one
527 527 * node this indicates a multi-chip module. Usually each node has its own
528 528 * access to memory and I/O devices. This is important and generally
529 529 * different from the corresponding Intel Nehalem-Skylake+ processors. As a
530 530 * result, we track this relationship in the operating system.
531 531 *
532 532 * In processors with an L3 cache, the L3 cache is generally shared across
533 533 * the entire node, though the way this is carved up varies from generation
534 534 * to generation.
535 535 *
536 536 * BULLDOZER
537 537 *
538 538 * Starting with the Bulldozer family (0x15) and continuing until the
539 539 * introduction of the Zen microarchitecture, AMD introduced the idea of a
540 540 * compute unit. In a compute unit, two traditional cores share a number of
541 541 * hardware resources. Critically, they share the FPU, L1 instruction
542 542 * cache, and the L2 cache. Several compute units were then combined inside
543 543 * of a single node. Because the integer execution units, L1 data cache,
544 544 * and some other resources were not shared between the cores, AMD never
545 545 * considered this to be SMT.
546 546 *
547 547 * ZEN
548 548 *
549 549 * The Zen family (0x17) uses a multi-chip module (MCM) design, the module
550 550 * is called Zeppelin. These modules are similar to the idea of nodes used
551 551 * previously. Each of these nodes has two DRAM channels which all of the
552 552 * cores in the node can access uniformly. These nodes are linked together
553 553 * in the package, creating a NUMA environment.
554 554 *
555 555 * The Zeppelin die itself contains two different 'core complexes'. Each
556 556 * core complex consists of four cores which each have two threads, for a
557 557 * total of 8 logical CPUs per complex. Unlike other generations,
558 558 * where all the logical CPUs in a given node share the L3 cache, here each
559 559 * core complex has its own shared L3 cache.
560 560 *
561 561 * A further thing that we need to consider is that in some configurations,
562 562 * particularly with the Threadripper line of processors, not every die
563 563 * actually has its memory controllers wired up to actual memory channels.
564 564 * This means that some cores have memory attached to them and others
565 565 * don't.
566 566 *
567 567 * To put Zen in perspective, consider the following images:
568 568 *
569 569 * +--------------------------------------------------------+
570 570 * | Core Complex |
571 571 * | +-------------------+ +-------------------+ +---+ |
572 572 * | | Core +----+ | | Core +----+ | | | |
573 573 * | | +--------+ | L2 | | | +--------+ | L2 | | | | |
574 574 * | | | Thread | +----+ | | | Thread | +----+ | | | |
575 575 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | L | |
576 576 * | | | Thread | |L1| | | | Thread | |L1| | | 3 | |
577 577 * | | +--------+ +--+ | | +--------+ +--+ | | | |
578 578 * | +-------------------+ +-------------------+ | C | |
579 579 * | +-------------------+ +-------------------+ | a | |
580 580 * | | Core +----+ | | Core +----+ | | c | |
581 581 * | | +--------+ | L2 | | | +--------+ | L2 | | | h | |
582 582 * | | | Thread | +----+ | | | Thread | +----+ | | e | |
583 583 * | | +--------+-+ +--+ | | +--------+-+ +--+ | | | |
584 584 * | | | Thread | |L1| | | | Thread | |L1| | | | |
585 585 * | | +--------+ +--+ | | +--------+ +--+ | | | |
586 586 * | +-------------------+ +-------------------+ +---+ |
587 587 * | |
588 588 * +--------------------------------------------------------+
589 589 *
590 590 * This first image represents a single Zen core complex that consists of four
591 591 * cores.
592 592 *
593 593 *
594 594 * +--------------------------------------------------------+
595 595 * | Zeppelin Die |
596 596 * | +--------------------------------------------------+ |
597 597 * | | I/O Units (PCIe, SATA, USB, etc.) | |
598 598 * | +--------------------------------------------------+ |
599 599 * | HH |
600 600 * | +-----------+ HH +-----------+ |
601 601 * | | | HH | | |
602 602 * | | Core |==========| Core | |
603 603 * | | Complex |==========| Complex | |
604 604 * | | | HH | | |
605 605 * | +-----------+ HH +-----------+ |
606 606 * | HH |
607 607 * | +--------------------------------------------------+ |
608 608 * | | Memory Controller | |
609 609 * | +--------------------------------------------------+ |
610 610 * | |
611 611 * +--------------------------------------------------------+
612 612 *
613 613 * This image represents a single Zeppelin Die. Note how both cores are
614 614 * connected to the same memory controller and I/O units. While each core
615 615 * complex has its own L3 cache as seen in the first image, they both have
616 616 * uniform access to memory.
617 617 *
618 618 *
619 619 * PP PP
620 620 * PP PP
621 621 * +----------PP---------------------PP---------+
622 622 * | PP PP |
623 623 * | +-----------+ +-----------+ |
624 624 * | | | | | |
625 625 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
626 626 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
627 627 * | | | | | |
628 628 * | +-----------+ooo ...+-----------+ |
629 629 * | HH ooo ... HH |
630 630 * | HH oo.. HH |
631 631 * | HH ..oo HH |
632 632 * | HH ... ooo HH |
633 633 * | +-----------+... ooo+-----------+ |
634 634 * | | | | | |
635 635 * MMMMMMMMM| Zeppelin |==========| Zeppelin |MMMMMMMMM
636 636 * MMMMMMMMM| Die |==========| Die |MMMMMMMMM
637 637 * | | | | | |
638 638 * | +-----------+ +-----------+ |
639 639 * | PP PP |
640 640 * +----------PP---------------------PP---------+
641 641 * PP PP
642 642 * PP PP
643 643 *
644 644 * This image represents a single Zen package. In this example, it has four
645 645 * Zeppelin dies, though some configurations only have a single one. In this
646 646 * example, each die is directly connected to the next. Also, each die is
647 647 * represented as being connected to memory by the 'M' character and connected
648 648 * to PCIe devices and other I/O, by the 'P' character. Because each Zeppelin
649 649 * die is made up of two core complexes, we have multiple different NUMA
650 650 * domains that we care about for these systems.
651 651 *
652 652 * CPUID LEAVES
653 653 *
654 654 * There are a few different CPUID leaves that we can use to try and understand
655 655 * the actual state of the world. As part of the introduction of family 0xf, AMD
656 656 * added CPUID leaf 0x80000008. This leaf tells us the number of logical
657 657 * processors that are in the system. Because families before Zen didn't have
658 658 * SMT, this was always the number of cores that were in the system. However, it
659 659 * should always be thought of as the number of logical threads to be consistent
660 660 * between generations. In addition we also get the size of the APIC ID that is
661 661 * used to represent the number of logical processors. This is important for
662 662 * deriving topology information.
663 663 *
664 664 * In the Bulldozer family, AMD added leaf 0x8000001E. The information varies a
665 665 * bit between Bulldozer and later families, but it is quite useful in
666 666 * determining the topology information. Because this information has changed
667 667 * across family generations, it's worth calling out what these mean
668 668 * explicitly. The registers have the following meanings:
669 669 *
670 670 * %eax The APIC ID. The entire register is defined to have a 32-bit
671 671 * APIC ID, even though on systems without x2apic support, it will
672 672 * be limited to 8 bits.
673 673 *
674 674 * %ebx On Bulldozer-era systems this contains information about the
675 675 * number of cores that are in a compute unit (cores that share
676 676 * resources). It also contains a per-package compute unit ID that
677 677 * identifies which compute unit the logical CPU is a part of.
678 678 *
679 679 * On Zen-era systems this instead contains the number of threads
680 680 * per core and the ID of the core that the logical CPU is a part
681 681 * of. Note, this ID is unique only to the package, it is not
682 682 * globally unique across the entire system.
683 683 *
684 684 * %ecx This contains the number of nodes that exist in the package. It
685 685 * also contains an ID that identifies which node the logical CPU
686 686 * is a part of.
687 687 *
688 688 * Finally, we also use cpuid leaf 0x8000001D to determine information about the
689 689 * cache layout to determine which logical CPUs are sharing which caches.
690 690 *
691 691 * illumos Topology
692 692 * ----------------
693 693 *
694 694 * Based on the above we synthesize the information into several different
695 695 * variables that we store in the 'struct cpuid_info'. We'll go into the details
696 696 * of what each member is supposed to represent and their uniqueness. In
697 697 * general, there are two levels of uniqueness that we care about. We care about
698 698 * an ID that is globally unique. That means that it will be unique across all
699 699 * entities in the system. For example, the default logical CPU ID is globally
700 700 * unique. On the other hand, there is some information that we only care about
701 701 * being unique within the context of a single package / socket. Here are the
702 702 * variables that we keep track of and their meaning.
703 703 *
704 704 * Several of the values that are asking for an identifier, with the exception
705 705 * of cpi_apicid, are allowed to be synthetic.
706 706 *
707 707 *
708 708 * cpi_apicid
709 709 *
710 710 * This is the value of the CPU's APIC id. This should be the full 32-bit
711 711 * ID if the CPU is using the x2apic. Otherwise, it should be the 8-bit
712 712 * APIC ID. This value is globally unique between all logical CPUs across
713 713 * all packages. This is usually required by the APIC.
714 714 *
715 715 * cpi_chipid
716 716 *
717 717 * This value indicates the ID of the package that the logical CPU is a
718 718 * part of. This value is allowed to be synthetic. It is usually derived by
719 719 * taking the CPU's APIC ID and determining how many bits are used to
720 720 * represent CPU cores in the package. All logical CPUs that are part of
721 721 * the same package must have the same value.
722 722 *
723 723 * cpi_coreid
724 724 *
725 725 * This represents the ID of a CPU core. Two logical CPUs should only have
726 726 * the same cpi_coreid value if they are part of the same core. These
727 727 * values may be synthetic. On systems that support SMT, this value is
728 728 * usually derived from the APIC ID, otherwise it is often synthetic and
729 729 * just set to the value of the cpu_id in the cpu_t.
730 730 *
731 731 * cpi_pkgcoreid
732 732 *
733 733 * This is similar to the cpi_coreid in that logical CPUs that are part of
734 734 * the same core should have the same ID. The main difference is that these
735 735 * values are only required to be unique to a given socket.
736 736 *
737 737 * cpi_clogid
738 738 *
739 739 * This represents the logical ID of a logical CPU. This value should be
740 740 * unique within a given socket for each logical CPU. This is allowed to be
741 741 * synthetic, though it is usually based off of the CPU's apic ID. The
742 742 * broader system expects that logical CPUs that have are part of the same
743 743 * core have contiguous numbers. For example, if there were two threads per
744 744 * core, then the core IDs divided by two should be the same and the first
745 745 * modulus two should be zero and the second one. For example, IDs 4 and 5
746 746 * indicate two logical CPUs that are part of the same core. But IDs 5 and
747 747 * 6 represent two logical CPUs that are part of different cores.
748 748 *
749 749 * While it is common for the cpi_coreid and the cpi_clogid to be derived
750 750 * from the same source, strictly speaking, they don't have to be and the
751 751 * two values should be considered logically independent. One should not
752 752 * try to compare a logical CPU's cpi_coreid and cpi_clogid to determine
753 753 * some kind of relationship. While this is tempting, we've seen cases on
754 754 * AMD family 0xf where the system's cpu id is not related to its APIC ID.
755 755 *
756 756 * cpi_ncpu_per_chip
757 757 *
758 758 * This value indicates the total number of logical CPUs that exist in the
759 759 * physical package. Critically, this is not the number of logical CPUs
760 760 * that exist for just the single core.
761 761 *
762 762 * This value should be the same for all logical CPUs in the same package.
763 763 *
764 764 * cpi_ncore_per_chip
765 765 *
766 766 * This value indicates the total number of physical CPU cores that exist
767 767 * in the package. The system compares this value with cpi_ncpu_per_chip to
768 768 * determine if simultaneous multi-threading (SMT) is enabled. When
769 769 * cpi_ncpu_per_chip equals cpi_ncore_per_chip, then there is no SMT and
770 770 * the X86FSET_HTT feature is not set. If this value is greater than one,
771 771 * than we consider the processor to have the feature X86FSET_CMP, to
772 772 * indicate that there is support for more than one core.
773 773 *
774 774 * This value should be the same for all logical CPUs in the same package.
775 775 *
776 776 * cpi_procnodes_per_pkg
777 777 *
778 778 * This value indicates the number of 'nodes' that exist in the package.
779 779 * When processors are actually a multi-chip module, this represents the
780 780 * number of such modules that exist in the package. Currently, on Intel
781 781 * based systems this member is always set to 1.
782 782 *
783 783 * This value should be the same for all logical CPUs in the same package.
784 784 *
785 785 * cpi_procnodeid
786 786 *
787 787 * This value indicates the ID of the node that the logical CPU is a part
788 788 * of. All logical CPUs that are in the same node must have the same value
789 789 * here. This value must be unique across all of the packages in the
790 790 * system. On Intel based systems, this is currently set to the value in
791 791 * cpi_chipid because there is only one node.
792 792 *
793 793 * cpi_cores_per_compunit
794 794 *
795 795 * This value indicates the number of cores that are part of a compute
796 796 * unit. See the AMD topology section for this. This member only has real
797 797 * meaning currently for AMD Bulldozer family processors. For all other
798 798 * processors, this should currently be set to 1.
799 799 *
800 800 * cpi_compunitid
801 801 *
802 802 * This indicates the compute unit that the logical CPU belongs to. For
803 803 * processors without AMD Bulldozer-style compute units this should be set
804 804 * to the value of cpi_coreid.
805 805 *
806 806 * cpi_ncpu_shr_last_cache
807 807 *
808 808 * This indicates the number of logical CPUs that are sharing the same last
809 809 * level cache. This value should be the same for all CPUs that are sharing
810 810 * that cache. The last cache refers to the cache that is closest to memory
811 811 * and furthest away from the CPU.
812 812 *
813 813 * cpi_last_lvl_cacheid
814 814 *
815 815 * This indicates the ID of the last cache that the logical CPU uses. This
816 816 * cache is often shared between multiple logical CPUs and is the cache
817 817 * that is closest to memory and furthest away from the CPU. This value
818 818 * should be the same for a group of logical CPUs only if they actually
819 819 * share the same last level cache. IDs should not overlap between
820 820 * packages.
821 821 *
822 822 * cpi_ncore_bits
823 823 *
824 824 * This indicates the number of bits that are required to represent all of
825 825 * the cores in the system. As cores are derived based on their APIC IDs,
826 826 * we aren't guaranteed a run of APIC IDs starting from zero. It's OK for
827 827 * this value to be larger than the actual number of IDs that are present
828 828 * in the system. This is used to size tables by the CMI framework. It is
829 829 * only filled in for Intel and AMD CPUs.
830 830 *
831 831 * cpi_nthread_bits
832 832 *
833 833 * This indicates the number of bits required to represent all of the IDs
834 834 * that cover the logical CPUs that exist on a given core. It's OK for this
835 835 * value to be larger than the actual number of IDs that are present in the
836 836 * system. This is used to size tables by the CMI framework. It is
837 837 * only filled in for Intel and AMD CPUs.
838 838 *
839 839 * -----------
840 840 * Hypervisors
841 841 * -----------
842 842 *
843 843 * If trying to manage the differences between vendors wasn't bad enough, it can
844 844 * get worse thanks to our friend hardware virtualization. Hypervisors are given
845 845 * the ability to interpose on all cpuid instructions and change them to suit
846 846 * their purposes. In general, this is necessary as the hypervisor wants to be
847 847 * able to present a more uniform set of features or not necessarily give the
848 848 * guest operating system kernel knowledge of all features so it can be
849 849 * more easily migrated between systems.
850 850 *
851 851 * When it comes to trying to determine topology information, this can be a
852 852 * double edged sword. When a hypervisor doesn't actually implement a cpuid
853 853 * leaf, it'll often return all zeros. Because of that, you'll often see various
854 854 * checks scattered about fields being non-zero before we assume we can use
855 855 * them.
856 856 *
857 857 * When it comes to topology information, the hypervisor is often incentivized
858 858 * to lie to you about topology. This is because it doesn't always actually
859 859 * guarantee that topology at all. The topology path we take in the system
860 860 * depends on how the CPU advertises itself. If it advertises itself as an Intel
861 861 * or AMD CPU, then we basically do our normal path. However, when they don't
862 862 * use an actual vendor, then that usually turns into multiple one-core CPUs
863 863 * that we enumerate that are often on different sockets. The actual behavior
864 864 * depends greatly on what the hypervisor actually exposes to us.
865 865 *
866 866 * --------------------
867 867 * Exposing Information
868 868 * --------------------
869 869 *
870 870 * We expose CPUID information in three different forms in the system.
871 871 *
872 872 * The first is through the x86_featureset variable. This is used in conjunction
873 873 * with the is_x86_feature() function. This is queried by x86-specific functions
874 874 * to determine which features are or aren't present in the system and to make
875 875 * decisions based upon them. For example, users of this include everything from
876 876 * parts of the system dedicated to reliability, availability, and
877 877 * serviceability (RAS), to making decisions about how to handle security
878 878 * mitigations, to various x86-specific drivers. General purpose or
879 879 * architecture independent drivers should never be calling this function.
880 880 *
881 881 * The second means is through the auxiliary vector. The auxiliary vector is a
882 882 * series of tagged data that the kernel passes down to a user program when it
883 883 * begins executing. This information is used to indicate to programs what
884 884 * instruction set extensions are present. For example, information about the
885 885 * CPU supporting the machine check architecture (MCA) wouldn't be passed down
886 886 * since user programs cannot make use of it. However, things like the AVX
887 887 * instruction sets are. Programs use this information to make run-time
888 888 * decisions about what features they should use. As an example, the run-time
889 889 * link-editor (rtld) can relocate different functions depending on the hardware
890 890 * support available.
891 891 *
892 892 * The final form is through a series of accessor functions that all have the
893 893 * form cpuid_get*. This is used by a number of different subsystems in the
894 894 * kernel to determine more detailed information about what we're running on,
895 895 * topology information, etc. Some of these subsystems include processor groups
896 896 * (uts/common/os/pg.c.), CPU Module Interface (uts/i86pc/os/cmi.c), ACPI,
897 897 * microcode, and performance monitoring. These functions all ASSERT that the
898 898 * CPU they're being called on has reached a certain cpuid pass. If the passes
899 899 * are rearranged, then this needs to be adjusted.
900 900 */
901 901
902 902 #include <sys/types.h>
903 903 #include <sys/archsystm.h>
904 904 #include <sys/x86_archext.h>
905 905 #include <sys/kmem.h>
906 906 #include <sys/systm.h>
907 907 #include <sys/cmn_err.h>
908 908 #include <sys/sunddi.h>
909 909 #include <sys/sunndi.h>
910 910 #include <sys/cpuvar.h>
911 911 #include <sys/processor.h>
912 912 #include <sys/sysmacros.h>
913 913 #include <sys/pg.h>
914 914 #include <sys/fp.h>
915 915 #include <sys/controlregs.h>
916 916 #include <sys/bitmap.h>
917 917 #include <sys/auxv_386.h>
918 918 #include <sys/memnode.h>
919 919 #include <sys/pci_cfgspace.h>
920 920 #include <sys/comm_page.h>
921 921 #include <sys/mach_mmu.h>
922 922 #include <sys/ucode.h>
923 923 #include <sys/tsc.h>
924 924
925 925 #ifdef __xpv
926 926 #include <sys/hypervisor.h>
927 927 #else
928 928 #include <sys/ontrap.h>
929 929 #endif
930 930
931 931 uint_t x86_vendor = X86_VENDOR_IntelClone;
932 932 uint_t x86_type = X86_TYPE_OTHER;
933 933 uint_t x86_clflush_size = 0;
934 934
935 935 #if defined(__xpv)
936 936 int x86_use_pcid = 0;
937 937 int x86_use_invpcid = 0;
938 938 #else
939 939 int x86_use_pcid = -1;
940 940 int x86_use_invpcid = -1;
941 941 #endif
942 942
943 943 uint_t pentiumpro_bug4046376;
944 944
945 945 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
946 946
947 947 static char *x86_feature_names[NUM_X86_FEATURES] = {
948 948 "lgpg",
949 949 "tsc",
950 950 "msr",
951 951 "mtrr",
952 952 "pge",
953 953 "de",
954 954 "cmov",
955 955 "mmx",
956 956 "mca",
957 957 "pae",
958 958 "cv8",
959 959 "pat",
960 960 "sep",
961 961 "sse",
962 962 "sse2",
963 963 "htt",
964 964 "asysc",
965 965 "nx",
966 966 "sse3",
967 967 "cx16",
968 968 "cmp",
969 969 "tscp",
970 970 "mwait",
971 971 "sse4a",
972 972 "cpuid",
973 973 "ssse3",
974 974 "sse4_1",
975 975 "sse4_2",
976 976 "1gpg",
977 977 "clfsh",
978 978 "64",
979 979 "aes",
980 980 "pclmulqdq",
981 981 "xsave",
982 982 "avx",
983 983 "vmx",
984 984 "svm",
985 985 "topoext",
986 986 "f16c",
987 987 "rdrand",
988 988 "x2apic",
989 989 "avx2",
990 990 "bmi1",
991 991 "bmi2",
992 992 "fma",
993 993 "smep",
994 994 "smap",
995 995 "adx",
996 996 "rdseed",
997 997 "mpx",
998 998 "avx512f",
999 999 "avx512dq",
1000 1000 "avx512pf",
1001 1001 "avx512er",
1002 1002 "avx512cd",
1003 1003 "avx512bw",
1004 1004 "avx512vl",
1005 1005 "avx512fma",
1006 1006 "avx512vbmi",
1007 1007 "avx512_vpopcntdq",
1008 1008 "avx512_4vnniw",
1009 1009 "avx512_4fmaps",
1010 1010 "xsaveopt",
1011 1011 "xsavec",
1012 1012 "xsaves",
1013 1013 "sha",
1014 1014 "umip",
1015 1015 "pku",
1016 1016 "ospke",
1017 1017 "pcid",
1018 1018 "invpcid",
1019 1019 "ibrs",
1020 1020 "ibpb",
1021 1021 "stibp",
1022 1022 "ssbd",
1023 1023 "ssbd_virt",
1024 1024 "rdcl_no",
1025 1025 "ibrs_all",
1026 1026 "rsba",
1027 1027 "ssb_no",
1028 1028 "stibp_all",
1029 1029 "flush_cmd",
1030 1030 "l1d_vmentry_no",
1031 1031 "fsgsbase",
1032 1032 "clflushopt",
1033 1033 "clwb",
1034 1034 "monitorx",
1035 1035 "clzero",
1036 1036 "xop",
1037 1037 "fma4",
1038 1038 "tbm",
1039 1039 "avx512_vnni",
1040 1040 "amd_pcec"
1041 1041 };
1042 1042
1043 1043 boolean_t
1044 1044 is_x86_feature(void *featureset, uint_t feature)
1045 1045 {
1046 1046 ASSERT(feature < NUM_X86_FEATURES);
1047 1047 return (BT_TEST((ulong_t *)featureset, feature));
1048 1048 }
1049 1049
1050 1050 void
1051 1051 add_x86_feature(void *featureset, uint_t feature)
1052 1052 {
1053 1053 ASSERT(feature < NUM_X86_FEATURES);
1054 1054 BT_SET((ulong_t *)featureset, feature);
1055 1055 }
1056 1056
1057 1057 void
1058 1058 remove_x86_feature(void *featureset, uint_t feature)
1059 1059 {
1060 1060 ASSERT(feature < NUM_X86_FEATURES);
1061 1061 BT_CLEAR((ulong_t *)featureset, feature);
1062 1062 }
1063 1063
1064 1064 boolean_t
1065 1065 compare_x86_featureset(void *setA, void *setB)
1066 1066 {
1067 1067 /*
1068 1068 * We assume that the unused bits of the bitmap are always zero.
1069 1069 */
1070 1070 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
1071 1071 return (B_TRUE);
1072 1072 } else {
1073 1073 return (B_FALSE);
1074 1074 }
1075 1075 }
1076 1076
1077 1077 void
1078 1078 print_x86_featureset(void *featureset)
1079 1079 {
1080 1080 uint_t i;
1081 1081
1082 1082 for (i = 0; i < NUM_X86_FEATURES; i++) {
1083 1083 if (is_x86_feature(featureset, i)) {
1084 1084 cmn_err(CE_CONT, "?x86_feature: %s\n",
1085 1085 x86_feature_names[i]);
1086 1086 }
1087 1087 }
1088 1088 }
1089 1089
1090 1090 /* Note: This is the maximum size for the CPU, not the size of the structure. */
1091 1091 static size_t xsave_state_size = 0;
1092 1092 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
1093 1093 boolean_t xsave_force_disable = B_FALSE;
1094 1094 extern int disable_smap;
1095 1095
1096 1096 /*
1097 1097 * This is set to platform type we are running on.
1098 1098 */
1099 1099 static int platform_type = -1;
1100 1100
1101 1101 #if !defined(__xpv)
1102 1102 /*
1103 1103 * Variable to patch if hypervisor platform detection needs to be
1104 1104 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
1105 1105 */
1106 1106 int enable_platform_detection = 1;
1107 1107 #endif
1108 1108
1109 1109 /*
1110 1110 * monitor/mwait info.
1111 1111 *
1112 1112 * size_actual and buf_actual are the real address and size allocated to get
1113 1113 * proper mwait_buf alignement. buf_actual and size_actual should be passed
1114 1114 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
1115 1115 * processor cache-line alignment, but this is not guarantied in the furture.
1116 1116 */
1117 1117 struct mwait_info {
1118 1118 size_t mon_min; /* min size to avoid missed wakeups */
1119 1119 size_t mon_max; /* size to avoid false wakeups */
1120 1120 size_t size_actual; /* size actually allocated */
1121 1121 void *buf_actual; /* memory actually allocated */
1122 1122 uint32_t support; /* processor support of monitor/mwait */
1123 1123 };
1124 1124
1125 1125 /*
1126 1126 * xsave/xrestor info.
1127 1127 *
1128 1128 * This structure contains HW feature bits and the size of the xsave save area.
1129 1129 * Note: the kernel declares a fixed size (AVX_XSAVE_SIZE) structure
1130 1130 * (xsave_state) to describe the xsave layout. However, at runtime the
1131 1131 * per-lwp xsave area is dynamically allocated based on xsav_max_size. The
1132 1132 * xsave_state structure simply represents the legacy layout of the beginning
1133 1133 * of the xsave area.
1134 1134 */
1135 1135 struct xsave_info {
1136 1136 uint32_t xsav_hw_features_low; /* Supported HW features */
1137 1137 uint32_t xsav_hw_features_high; /* Supported HW features */
1138 1138 size_t xsav_max_size; /* max size save area for HW features */
1139 1139 size_t ymm_size; /* AVX: size of ymm save area */
1140 1140 size_t ymm_offset; /* AVX: offset for ymm save area */
1141 1141 size_t bndregs_size; /* MPX: size of bndregs save area */
1142 1142 size_t bndregs_offset; /* MPX: offset for bndregs save area */
1143 1143 size_t bndcsr_size; /* MPX: size of bndcsr save area */
1144 1144 size_t bndcsr_offset; /* MPX: offset for bndcsr save area */
1145 1145 size_t opmask_size; /* AVX512: size of opmask save */
1146 1146 size_t opmask_offset; /* AVX512: offset for opmask save */
1147 1147 size_t zmmlo_size; /* AVX512: size of zmm 256 save */
1148 1148 size_t zmmlo_offset; /* AVX512: offset for zmm 256 save */
1149 1149 size_t zmmhi_size; /* AVX512: size of zmm hi reg save */
1150 1150 size_t zmmhi_offset; /* AVX512: offset for zmm hi reg save */
1151 1151 };
1152 1152
1153 1153
1154 1154 /*
1155 1155 * These constants determine how many of the elements of the
1156 1156 * cpuid we cache in the cpuid_info data structure; the
1157 1157 * remaining elements are accessible via the cpuid instruction.
1158 1158 */
1159 1159
1160 1160 #define NMAX_CPI_STD 8 /* eax = 0 .. 7 */
1161 1161 #define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
1162 1162
1163 1163 /*
1164 1164 * See the big theory statement for a more detailed explanation of what some of
1165 1165 * these members mean.
1166 1166 */
1167 1167 struct cpuid_info {
1168 1168 uint_t cpi_pass; /* last pass completed */
1169 1169 /*
1170 1170 * standard function information
1171 1171 */
1172 1172 uint_t cpi_maxeax; /* fn 0: %eax */
1173 1173 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
1174 1174 uint_t cpi_vendor; /* enum of cpi_vendorstr */
1175 1175
1176 1176 uint_t cpi_family; /* fn 1: extended family */
1177 1177 uint_t cpi_model; /* fn 1: extended model */
1178 1178 uint_t cpi_step; /* fn 1: stepping */
1179 1179 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
1180 1180 /* AMD: package/socket # */
1181 1181 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
1182 1182 int cpi_clogid; /* fn 1: %ebx: thread # */
1183 1183 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
1184 1184 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
1185 1185 uint_t cpi_ncache; /* fn 2: number of elements */
1186 1186 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
1187 1187 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
1188 1188 uint_t cpi_cache_leaf_size; /* Number of cache elements */
1189 1189 /* Intel fn: 4, AMD fn: 8000001d */
1190 1190 struct cpuid_regs **cpi_cache_leaves; /* Acual leaves from above */
1191 1191 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 7 */
1192 1192 /*
1193 1193 * extended function information
1194 1194 */
1195 1195 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
1196 1196 char cpi_brandstr[49]; /* fn 0x8000000[234] */
1197 1197 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
1198 1198 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
1199 1199 uint8_t cpi_fp_amd_save; /* AMD: FP error pointer save rqd. */
1200 1200 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
1201 1201
1202 1202 id_t cpi_coreid; /* same coreid => strands share core */
1203 1203 int cpi_pkgcoreid; /* core number within single package */
1204 1204 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
1205 1205 /* Intel: fn 4: %eax[31-26] */
1206 1206
1207 1207 /*
1208 1208 * These values represent the number of bits that are required to store
1209 1209 * information about the number of cores and threads.
1210 1210 */
1211 1211 uint_t cpi_ncore_bits;
1212 1212 uint_t cpi_nthread_bits;
1213 1213 /*
1214 1214 * supported feature information
1215 1215 */
1216 1216 uint32_t cpi_support[6];
1217 1217 #define STD_EDX_FEATURES 0
1218 1218 #define AMD_EDX_FEATURES 1
1219 1219 #define TM_EDX_FEATURES 2
1220 1220 #define STD_ECX_FEATURES 3
1221 1221 #define AMD_ECX_FEATURES 4
1222 1222 #define STD_EBX_FEATURES 5
1223 1223 /*
1224 1224 * Synthesized information, where known.
1225 1225 */
1226 1226 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
1227 1227 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
1228 1228 uint32_t cpi_socket; /* Chip package/socket type */
1229 1229
1230 1230 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
1231 1231 uint32_t cpi_apicid;
1232 1232 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
1233 1233 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
1234 1234 /* Intel: 1 */
1235 1235 uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
1236 1236 uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
1237 1237
1238 1238 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
1239 1239 };
1240 1240
1241 1241
1242 1242 static struct cpuid_info cpuid_info0;
1243 1243
1244 1244 /*
1245 1245 * These bit fields are defined by the Intel Application Note AP-485
1246 1246 * "Intel Processor Identification and the CPUID Instruction"
1247 1247 */
1248 1248 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
1249 1249 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
1250 1250 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
1251 1251 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
1252 1252 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
1253 1253 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
1254 1254
1255 1255 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
1256 1256 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
1257 1257 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
1258 1258 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
1259 1259 #define CPI_FEATURES_7_0_EBX(cpi) ((cpi)->cpi_std[7].cp_ebx)
1260 1260 #define CPI_FEATURES_7_0_ECX(cpi) ((cpi)->cpi_std[7].cp_ecx)
1261 1261 #define CPI_FEATURES_7_0_EDX(cpi) ((cpi)->cpi_std[7].cp_edx)
1262 1262
1263 1263 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
1264 1264 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
1265 1265 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
1266 1266 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
1267 1267
1268 1268 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
1269 1269 #define CPI_XMAXEAX_MAX 0x80000100
1270 1270 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
1271 1271 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
1272 1272
1273 1273 /*
1274 1274 * Function 4 (Deterministic Cache Parameters) macros
1275 1275 * Defined by Intel Application Note AP-485
1276 1276 */
1277 1277 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
1278 1278 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
1279 1279 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
1280 1280 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
1281 1281 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
1282 1282 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
1283 1283 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
1284 1284
1285 1285 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
1286 1286 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
1287 1287 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
1288 1288
1289 1289 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
1290 1290
1291 1291 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
1292 1292
1293 1293
1294 1294 /*
1295 1295 * A couple of shorthand macros to identify "later" P6-family chips
1296 1296 * like the Pentium M and Core. First, the "older" P6-based stuff
1297 1297 * (loosely defined as "pre-Pentium-4"):
1298 1298 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
1299 1299 */
1300 1300 #define IS_LEGACY_P6(cpi) ( \
1301 1301 cpi->cpi_family == 6 && \
1302 1302 (cpi->cpi_model == 1 || \
1303 1303 cpi->cpi_model == 3 || \
1304 1304 cpi->cpi_model == 5 || \
1305 1305 cpi->cpi_model == 6 || \
1306 1306 cpi->cpi_model == 7 || \
1307 1307 cpi->cpi_model == 8 || \
1308 1308 cpi->cpi_model == 0xA || \
1309 1309 cpi->cpi_model == 0xB) \
1310 1310 )
1311 1311
1312 1312 /* A "new F6" is everything with family 6 that's not the above */
1313 1313 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
1314 1314
1315 1315 /* Extended family/model support */
1316 1316 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
1317 1317 cpi->cpi_family >= 0xf)
1318 1318
1319 1319 /*
1320 1320 * Info for monitor/mwait idle loop.
1321 1321 *
1322 1322 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
1323 1323 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
1324 1324 * 2006.
1325 1325 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
1326 1326 * Documentation Updates" #33633, Rev 2.05, December 2006.
1327 1327 */
1328 1328 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
1329 1329 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
1330 1330 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
1331 1331 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
1332 1332 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
1333 1333 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
1334 1334 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
1335 1335 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
1336 1336 /*
1337 1337 * Number of sub-cstates for a given c-state.
1338 1338 */
1339 1339 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
1340 1340 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
1341 1341
1342 1342 /*
1343 1343 * XSAVE leaf 0xD enumeration
1344 1344 */
1345 1345 #define CPUID_LEAFD_2_YMM_OFFSET 576
1346 1346 #define CPUID_LEAFD_2_YMM_SIZE 256
1347 1347
1348 1348 /*
1349 1349 * Common extended leaf names to cut down on typos.
1350 1350 */
1351 1351 #define CPUID_LEAF_EXT_0 0x80000000
1352 1352 #define CPUID_LEAF_EXT_8 0x80000008
1353 1353 #define CPUID_LEAF_EXT_1d 0x8000001d
1354 1354 #define CPUID_LEAF_EXT_1e 0x8000001e
1355 1355
1356 1356 /*
1357 1357 * Functions we consune from cpuid_subr.c; don't publish these in a header
1358 1358 * file to try and keep people using the expected cpuid_* interfaces.
1359 1359 */
1360 1360 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
1361 1361 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
1362 1362 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
1363 1363 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
1364 1364 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
1365 1365
1366 1366 /*
1367 1367 * Apply up various platform-dependent restrictions where the
1368 1368 * underlying platform restrictions mean the CPU can be marked
1369 1369 * as less capable than its cpuid instruction would imply.
1370 1370 */
1371 1371 #if defined(__xpv)
1372 1372 static void
1373 1373 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
1374 1374 {
1375 1375 switch (eax) {
1376 1376 case 1: {
1377 1377 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
1378 1378 0 : CPUID_INTC_EDX_MCA;
1379 1379 cp->cp_edx &=
1380 1380 ~(mcamask |
1381 1381 CPUID_INTC_EDX_PSE |
1382 1382 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1383 1383 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
1384 1384 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
1385 1385 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1386 1386 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
1387 1387 break;
1388 1388 }
1389 1389
1390 1390 case 0x80000001:
1391 1391 cp->cp_edx &=
1392 1392 ~(CPUID_AMD_EDX_PSE |
1393 1393 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
1394 1394 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
1395 1395 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
1396 1396 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
1397 1397 CPUID_AMD_EDX_TSCP);
1398 1398 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
1399 1399 break;
1400 1400 default:
1401 1401 break;
1402 1402 }
1403 1403
1404 1404 switch (vendor) {
1405 1405 case X86_VENDOR_Intel:
1406 1406 switch (eax) {
1407 1407 case 4:
1408 1408 /*
1409 1409 * Zero out the (ncores-per-chip - 1) field
1410 1410 */
1411 1411 cp->cp_eax &= 0x03fffffff;
1412 1412 break;
1413 1413 default:
1414 1414 break;
1415 1415 }
1416 1416 break;
1417 1417 case X86_VENDOR_AMD:
1418 1418 switch (eax) {
1419 1419
1420 1420 case 0x80000001:
1421 1421 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
1422 1422 break;
1423 1423
1424 1424 case CPUID_LEAF_EXT_8:
1425 1425 /*
1426 1426 * Zero out the (ncores-per-chip - 1) field
1427 1427 */
1428 1428 cp->cp_ecx &= 0xffffff00;
1429 1429 break;
1430 1430 default:
1431 1431 break;
1432 1432 }
1433 1433 break;
1434 1434 default:
1435 1435 break;
1436 1436 }
1437 1437 }
1438 1438 #else
1439 1439 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
1440 1440 #endif
1441 1441
1442 1442 /*
1443 1443 * Some undocumented ways of patching the results of the cpuid
1444 1444 * instruction to permit running Solaris 10 on future cpus that
1445 1445 * we don't currently support. Could be set to non-zero values
1446 1446 * via settings in eeprom.
1447 1447 */
1448 1448
1449 1449 uint32_t cpuid_feature_ecx_include;
1450 1450 uint32_t cpuid_feature_ecx_exclude;
1451 1451 uint32_t cpuid_feature_edx_include;
1452 1452 uint32_t cpuid_feature_edx_exclude;
1453 1453
1454 1454 /*
1455 1455 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
1456 1456 */
1457 1457 void
1458 1458 cpuid_alloc_space(cpu_t *cpu)
1459 1459 {
1460 1460 /*
1461 1461 * By convention, cpu0 is the boot cpu, which is set up
1462 1462 * before memory allocation is available. All other cpus get
1463 1463 * their cpuid_info struct allocated here.
1464 1464 */
1465 1465 ASSERT(cpu->cpu_id != 0);
1466 1466 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
1467 1467 cpu->cpu_m.mcpu_cpi =
1468 1468 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
1469 1469 }
1470 1470
1471 1471 void
1472 1472 cpuid_free_space(cpu_t *cpu)
1473 1473 {
1474 1474 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1475 1475 int i;
1476 1476
1477 1477 ASSERT(cpi != NULL);
1478 1478 ASSERT(cpi != &cpuid_info0);
1479 1479
1480 1480 /*
1481 1481 * Free up any cache leaf related dynamic storage. The first entry was
1482 1482 * cached from the standard cpuid storage, so we should not free it.
1483 1483 */
1484 1484 for (i = 1; i < cpi->cpi_cache_leaf_size; i++)
1485 1485 kmem_free(cpi->cpi_cache_leaves[i], sizeof (struct cpuid_regs));
1486 1486 if (cpi->cpi_cache_leaf_size > 0)
1487 1487 kmem_free(cpi->cpi_cache_leaves,
1488 1488 cpi->cpi_cache_leaf_size * sizeof (struct cpuid_regs *));
1489 1489
1490 1490 kmem_free(cpi, sizeof (*cpi));
1491 1491 cpu->cpu_m.mcpu_cpi = NULL;
1492 1492 }
1493 1493
1494 1494 #if !defined(__xpv)
1495 1495 /*
1496 1496 * Determine the type of the underlying platform. This is used to customize
1497 1497 * initialization of various subsystems (e.g. TSC). determine_platform() must
1498 1498 * only ever be called once to prevent two processors from seeing different
1499 1499 * values of platform_type. Must be called before cpuid_pass1(), the earliest
1500 1500 * consumer to execute (uses _cpuid_chiprev --> synth_amd_info --> get_hwenv).
1501 1501 */
1502 1502 void
1503 1503 determine_platform(void)
1504 1504 {
1505 1505 struct cpuid_regs cp;
1506 1506 uint32_t base;
1507 1507 uint32_t regs[4];
1508 1508 char *hvstr = (char *)regs;
1509 1509
1510 1510 ASSERT(platform_type == -1);
1511 1511
1512 1512 platform_type = HW_NATIVE;
1513 1513
1514 1514 if (!enable_platform_detection)
1515 1515 return;
1516 1516
1517 1517 /*
1518 1518 * If Hypervisor CPUID bit is set, try to determine hypervisor
1519 1519 * vendor signature, and set platform type accordingly.
1520 1520 *
1521 1521 * References:
1522 1522 * http://lkml.org/lkml/2008/10/1/246
1523 1523 * http://kb.vmware.com/kb/1009458
1524 1524 */
1525 1525 cp.cp_eax = 0x1;
1526 1526 (void) __cpuid_insn(&cp);
1527 1527 if ((cp.cp_ecx & CPUID_INTC_ECX_HV) != 0) {
1528 1528 cp.cp_eax = 0x40000000;
1529 1529 (void) __cpuid_insn(&cp);
1530 1530 regs[0] = cp.cp_ebx;
1531 1531 regs[1] = cp.cp_ecx;
1532 1532 regs[2] = cp.cp_edx;
1533 1533 regs[3] = 0;
1534 1534 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0) {
1535 1535 platform_type = HW_XEN_HVM;
1536 1536 return;
1537 1537 }
1538 1538 if (strcmp(hvstr, HVSIG_VMWARE) == 0) {
1539 1539 platform_type = HW_VMWARE;
1540 1540 return;
1541 1541 }
1542 1542 if (strcmp(hvstr, HVSIG_KVM) == 0) {
1543 1543 platform_type = HW_KVM;
1544 1544 return;
1545 1545 }
1546 1546 if (strcmp(hvstr, HVSIG_BHYVE) == 0) {
1547 1547 platform_type = HW_BHYVE;
1548 1548 return;
1549 1549 }
1550 1550 if (strcmp(hvstr, HVSIG_MICROSOFT) == 0)
1551 1551 platform_type = HW_MICROSOFT;
1552 1552 } else {
1553 1553 /*
1554 1554 * Check older VMware hardware versions. VMware hypervisor is
1555 1555 * detected by performing an IN operation to VMware hypervisor
1556 1556 * port and checking that value returned in %ebx is VMware
1557 1557 * hypervisor magic value.
1558 1558 *
1559 1559 * References: http://kb.vmware.com/kb/1009458
1560 1560 */
1561 1561 vmware_port(VMWARE_HVCMD_GETVERSION, regs);
1562 1562 if (regs[1] == VMWARE_HVMAGIC) {
1563 1563 platform_type = HW_VMWARE;
1564 1564 return;
1565 1565 }
1566 1566 }
1567 1567
1568 1568 /*
1569 1569 * Check Xen hypervisor. In a fully virtualized domain,
1570 1570 * Xen's pseudo-cpuid function returns a string representing the
1571 1571 * Xen signature in %ebx, %ecx, and %edx. %eax contains the maximum
1572 1572 * supported cpuid function. We need at least a (base + 2) leaf value
1573 1573 * to do what we want to do. Try different base values, since the
1574 1574 * hypervisor might use a different one depending on whether Hyper-V
1575 1575 * emulation is switched on by default or not.
1576 1576 */
1577 1577 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1578 1578 cp.cp_eax = base;
1579 1579 (void) __cpuid_insn(&cp);
1580 1580 regs[0] = cp.cp_ebx;
1581 1581 regs[1] = cp.cp_ecx;
1582 1582 regs[2] = cp.cp_edx;
1583 1583 regs[3] = 0;
1584 1584 if (strcmp(hvstr, HVSIG_XEN_HVM) == 0 &&
1585 1585 cp.cp_eax >= (base + 2)) {
1586 1586 platform_type &= ~HW_NATIVE;
1587 1587 platform_type |= HW_XEN_HVM;
1588 1588 return;
1589 1589 }
1590 1590 }
1591 1591 }
1592 1592
1593 1593 int
1594 1594 get_hwenv(void)
1595 1595 {
1596 1596 ASSERT(platform_type != -1);
1597 1597 return (platform_type);
1598 1598 }
1599 1599
1600 1600 int
1601 1601 is_controldom(void)
1602 1602 {
1603 1603 return (0);
1604 1604 }
1605 1605
1606 1606 #else
1607 1607
1608 1608 int
1609 1609 get_hwenv(void)
1610 1610 {
1611 1611 return (HW_XEN_PV);
1612 1612 }
1613 1613
1614 1614 int
1615 1615 is_controldom(void)
1616 1616 {
1617 1617 return (DOMAIN_IS_INITDOMAIN(xen_info));
1618 1618 }
1619 1619
1620 1620 #endif /* __xpv */
1621 1621
1622 1622 /*
1623 1623 * Make sure that we have gathered all of the CPUID leaves that we might need to
1624 1624 * determine topology. We assume that the standard leaf 1 has already been done
1625 1625 * and that xmaxeax has already been calculated.
1626 1626 */
1627 1627 static void
1628 1628 cpuid_gather_amd_topology_leaves(cpu_t *cpu)
1629 1629 {
1630 1630 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1631 1631
1632 1632 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1633 1633 struct cpuid_regs *cp;
1634 1634
1635 1635 cp = &cpi->cpi_extd[8];
1636 1636 cp->cp_eax = CPUID_LEAF_EXT_8;
1637 1637 (void) __cpuid_insn(cp);
1638 1638 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, cp);
1639 1639 }
1640 1640
1641 1641 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1642 1642 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1643 1643 struct cpuid_regs *cp;
1644 1644
1645 1645 cp = &cpi->cpi_extd[0x1e];
1646 1646 cp->cp_eax = CPUID_LEAF_EXT_1e;
1647 1647 (void) __cpuid_insn(cp);
1648 1648 }
1649 1649 }
1650 1650
1651 1651 /*
1652 1652 * Get the APIC ID for this processor. If Leaf B is present and valid, we prefer
1653 1653 * it to everything else. If not, and we're on an AMD system where 8000001e is
1654 1654 * valid, then we use that. Othewrise, we fall back to the default value for the
1655 1655 * APIC ID in leaf 1.
1656 1656 */
1657 1657 static uint32_t
1658 1658 cpuid_gather_apicid(struct cpuid_info *cpi)
1659 1659 {
1660 1660 /*
1661 1661 * Leaf B changes based on the arguments to it. Beacuse we don't cache
1662 1662 * it, we need to gather it again.
1663 1663 */
1664 1664 if (cpi->cpi_maxeax >= 0xB) {
1665 1665 struct cpuid_regs regs;
1666 1666 struct cpuid_regs *cp;
1667 1667
1668 1668 cp = ®s;
1669 1669 cp->cp_eax = 0xB;
1670 1670 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1671 1671 (void) __cpuid_insn(cp);
1672 1672
1673 1673 if (cp->cp_ebx != 0) {
1674 1674 return (cp->cp_edx);
1675 1675 }
1676 1676 }
1677 1677
1678 1678 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1679 1679 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1680 1680 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1681 1681 return (cpi->cpi_extd[0x1e].cp_eax);
1682 1682 }
1683 1683
1684 1684 return (CPI_APIC_ID(cpi));
1685 1685 }
1686 1686
1687 1687 /*
1688 1688 * For AMD processors, attempt to calculate the number of chips and cores that
1689 1689 * exist. The way that we do this varies based on the generation, because the
1690 1690 * generations themselves have changed dramatically.
1691 1691 *
1692 1692 * If cpuid leaf 0x80000008 exists, that generally tells us the number of cores.
1693 1693 * However, with the advent of family 17h (Zen) it actually tells us the number
1694 1694 * of threads, so we need to look at leaf 0x8000001e if available to determine
1695 1695 * its value. Otherwise, for all prior families, the number of enabled cores is
1696 1696 * the same as threads.
1697 1697 *
1698 1698 * If we do not have leaf 0x80000008, then we assume that this processor does
1699 1699 * not have anything. AMD's older CPUID specification says there's no reason to
1700 1700 * fall back to leaf 1.
1701 1701 *
1702 1702 * In some virtualization cases we will not have leaf 8000001e or it will be
1703 1703 * zero. When that happens we assume the number of threads is one.
1704 1704 */
1705 1705 static void
1706 1706 cpuid_amd_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1707 1707 {
1708 1708 uint_t nthreads, nthread_per_core;
1709 1709
1710 1710 nthreads = nthread_per_core = 1;
1711 1711
1712 1712 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
1713 1713 nthreads = BITX(cpi->cpi_extd[8].cp_ecx, 7, 0) + 1;
1714 1714 } else if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1715 1715 nthreads = CPI_CPU_COUNT(cpi);
1716 1716 }
1717 1717
1718 1718 /*
1719 1719 * For us to have threads, and know about it, we have to be at least at
1720 1720 * family 17h and have the cpuid bit that says we have extended
1721 1721 * topology.
1722 1722 */
1723 1723 if (cpi->cpi_family >= 0x17 &&
1724 1724 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1725 1725 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1726 1726 nthread_per_core = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1727 1727 }
1728 1728
1729 1729 *ncpus = nthreads;
1730 1730 *ncores = nthreads / nthread_per_core;
1731 1731 }
1732 1732
1733 1733 /*
1734 1734 * Seed the initial values for the cores and threads for an Intel based
1735 1735 * processor. These values will be overwritten if we detect that the processor
1736 1736 * supports CPUID leaf 0xb.
1737 1737 */
1738 1738 static void
1739 1739 cpuid_intel_ncores(struct cpuid_info *cpi, uint_t *ncpus, uint_t *ncores)
1740 1740 {
1741 1741 /*
1742 1742 * Only seed the number of physical cores from the first level leaf 4
1743 1743 * information. The number of threads there indicate how many share the
1744 1744 * L1 cache, which may or may not have anything to do with the number of
1745 1745 * logical CPUs per core.
1746 1746 */
1747 1747 if (cpi->cpi_maxeax >= 4) {
1748 1748 *ncores = BITX(cpi->cpi_std[4].cp_eax, 31, 26) + 1;
1749 1749 } else {
1750 1750 *ncores = 1;
1751 1751 }
1752 1752
1753 1753 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
1754 1754 *ncpus = CPI_CPU_COUNT(cpi);
1755 1755 } else {
1756 1756 *ncpus = *ncores;
1757 1757 }
1758 1758 }
1759 1759
1760 1760 static boolean_t
1761 1761 cpuid_leafB_getids(cpu_t *cpu)
1762 1762 {
1763 1763 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1764 1764 struct cpuid_regs regs;
1765 1765 struct cpuid_regs *cp;
1766 1766
1767 1767 if (cpi->cpi_maxeax < 0xB)
1768 1768 return (B_FALSE);
1769 1769
1770 1770 cp = ®s;
1771 1771 cp->cp_eax = 0xB;
1772 1772 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1773 1773
1774 1774 (void) __cpuid_insn(cp);
1775 1775
1776 1776 /*
1777 1777 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1778 1778 * indicates that the extended topology enumeration leaf is
1779 1779 * available.
1780 1780 */
1781 1781 if (cp->cp_ebx != 0) {
1782 1782 uint32_t x2apic_id = 0;
1783 1783 uint_t coreid_shift = 0;
1784 1784 uint_t ncpu_per_core = 1;
1785 1785 uint_t chipid_shift = 0;
1786 1786 uint_t ncpu_per_chip = 1;
1787 1787 uint_t i;
1788 1788 uint_t level;
1789 1789
1790 1790 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1791 1791 cp->cp_eax = 0xB;
1792 1792 cp->cp_ecx = i;
1793 1793
1794 1794 (void) __cpuid_insn(cp);
1795 1795 level = CPI_CPU_LEVEL_TYPE(cp);
1796 1796
1797 1797 if (level == 1) {
1798 1798 x2apic_id = cp->cp_edx;
1799 1799 coreid_shift = BITX(cp->cp_eax, 4, 0);
1800 1800 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1801 1801 } else if (level == 2) {
1802 1802 x2apic_id = cp->cp_edx;
1803 1803 chipid_shift = BITX(cp->cp_eax, 4, 0);
1804 1804 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1805 1805 }
1806 1806 }
1807 1807
1808 1808 /*
1809 1809 * cpi_apicid is taken care of in cpuid_gather_apicid.
1810 1810 */
1811 1811 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1812 1812 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1813 1813 ncpu_per_core;
1814 1814 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1815 1815 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1816 1816 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1817 1817 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1818 1818 cpi->cpi_procnodeid = cpi->cpi_chipid;
1819 1819 cpi->cpi_compunitid = cpi->cpi_coreid;
1820 1820
1821 1821 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
1822 1822 cpi->cpi_nthread_bits = coreid_shift;
1823 1823 cpi->cpi_ncore_bits = chipid_shift - coreid_shift;
1824 1824 }
1825 1825
1826 1826 return (B_TRUE);
1827 1827 } else {
1828 1828 return (B_FALSE);
1829 1829 }
1830 1830 }
1831 1831
1832 1832 static void
1833 1833 cpuid_intel_getids(cpu_t *cpu, void *feature)
1834 1834 {
1835 1835 uint_t i;
1836 1836 uint_t chipid_shift = 0;
1837 1837 uint_t coreid_shift = 0;
1838 1838 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1839 1839
1840 1840 /*
1841 1841 * There are no compute units or processor nodes currently on Intel.
1842 1842 * Always set these to one.
1843 1843 */
1844 1844 cpi->cpi_procnodes_per_pkg = 1;
1845 1845 cpi->cpi_cores_per_compunit = 1;
1846 1846
1847 1847 /*
1848 1848 * If cpuid Leaf B is present, use that to try and get this information.
1849 1849 * It will be the most accurate for Intel CPUs.
1850 1850 */
1851 1851 if (cpuid_leafB_getids(cpu))
1852 1852 return;
1853 1853
1854 1854 /*
1855 1855 * In this case, we have the leaf 1 and leaf 4 values for ncpu_per_chip
1856 1856 * and ncore_per_chip. These represent the largest power of two values
1857 1857 * that we need to cover all of the IDs in the system. Therefore, we use
1858 1858 * those values to seed the number of bits needed to cover information
1859 1859 * in the case when leaf B is not available. These values will probably
1860 1860 * be larger than required, but that's OK.
1861 1861 */
1862 1862 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip);
1863 1863 cpi->cpi_ncore_bits = ddi_fls(cpi->cpi_ncore_per_chip);
1864 1864
1865 1865 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
1866 1866 chipid_shift++;
1867 1867
1868 1868 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
1869 1869 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
1870 1870
1871 1871 if (is_x86_feature(feature, X86FSET_CMP)) {
1872 1872 /*
1873 1873 * Multi-core (and possibly multi-threaded)
1874 1874 * processors.
1875 1875 */
1876 1876 uint_t ncpu_per_core;
1877 1877 if (cpi->cpi_ncore_per_chip == 1)
1878 1878 ncpu_per_core = cpi->cpi_ncpu_per_chip;
1879 1879 else if (cpi->cpi_ncore_per_chip > 1)
1880 1880 ncpu_per_core = cpi->cpi_ncpu_per_chip /
1881 1881 cpi->cpi_ncore_per_chip;
1882 1882 /*
1883 1883 * 8bit APIC IDs on dual core Pentiums
1884 1884 * look like this:
1885 1885 *
1886 1886 * +-----------------------+------+------+
1887 1887 * | Physical Package ID | MC | HT |
1888 1888 * +-----------------------+------+------+
1889 1889 * <------- chipid -------->
1890 1890 * <------- coreid --------------->
1891 1891 * <--- clogid -->
1892 1892 * <------>
1893 1893 * pkgcoreid
1894 1894 *
1895 1895 * Where the number of bits necessary to
1896 1896 * represent MC and HT fields together equals
1897 1897 * to the minimum number of bits necessary to
1898 1898 * store the value of cpi->cpi_ncpu_per_chip.
1899 1899 * Of those bits, the MC part uses the number
1900 1900 * of bits necessary to store the value of
1901 1901 * cpi->cpi_ncore_per_chip.
1902 1902 */
1903 1903 for (i = 1; i < ncpu_per_core; i <<= 1)
1904 1904 coreid_shift++;
1905 1905 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
1906 1906 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1907 1907 } else if (is_x86_feature(feature, X86FSET_HTT)) {
1908 1908 /*
1909 1909 * Single-core multi-threaded processors.
1910 1910 */
1911 1911 cpi->cpi_coreid = cpi->cpi_chipid;
1912 1912 cpi->cpi_pkgcoreid = 0;
1913 1913 } else {
1914 1914 /*
1915 1915 * Single-core single-thread processors.
1916 1916 */
1917 1917 cpi->cpi_coreid = cpu->cpu_id;
1918 1918 cpi->cpi_pkgcoreid = 0;
1919 1919 }
1920 1920 cpi->cpi_procnodeid = cpi->cpi_chipid;
1921 1921 cpi->cpi_compunitid = cpi->cpi_coreid;
1922 1922 }
1923 1923
1924 1924 /*
1925 1925 * Historically, AMD has had CMP chips with only a single thread per core.
1926 1926 * However, starting in family 17h (Zen), this has changed and they now have
1927 1927 * multiple threads. Our internal core id needs to be a unique value.
1928 1928 *
1929 1929 * To determine the core id of an AMD system, if we're from a family before 17h,
1930 1930 * then we just use the cpu id, as that gives us a good value that will be
1931 1931 * unique for each core. If instead, we're on family 17h or later, then we need
1932 1932 * to do something more complicated. CPUID leaf 0x8000001e can tell us
1933 1933 * how many threads are in the system. Based on that, we'll shift the APIC ID.
1934 1934 * We can't use the normal core id in that leaf as it's only unique within the
1935 1935 * socket, which is perfect for cpi_pkgcoreid, but not us.
1936 1936 */
1937 1937 static id_t
1938 1938 cpuid_amd_get_coreid(cpu_t *cpu)
1939 1939 {
1940 1940 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1941 1941
1942 1942 if (cpi->cpi_family >= 0x17 &&
1943 1943 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
1944 1944 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
1945 1945 uint_t nthreads = BITX(cpi->cpi_extd[0x1e].cp_ebx, 15, 8) + 1;
1946 1946 if (nthreads > 1) {
1947 1947 VERIFY3U(nthreads, ==, 2);
1948 1948 return (cpi->cpi_apicid >> 1);
1949 1949 }
1950 1950 }
1951 1951
1952 1952 return (cpu->cpu_id);
1953 1953 }
1954 1954
1955 1955 /*
1956 1956 * IDs on AMD is a more challenging task. This is notable because of the
1957 1957 * following two facts:
1958 1958 *
1959 1959 * 1. Before family 0x17 (Zen), there was no support for SMT and there was
1960 1960 * also no way to get an actual unique core id from the system. As such, we
1961 1961 * synthesize this case by using cpu->cpu_id. This scheme does not,
1962 1962 * however, guarantee that sibling cores of a chip will have sequential
1963 1963 * coreids starting at a multiple of the number of cores per chip - that is
1964 1964 * usually the case, but if the ACPI MADT table is presented in a different
1965 1965 * order then we need to perform a few more gymnastics for the pkgcoreid.
1966 1966 *
1967 1967 * 2. In families 0x15 and 16x (Bulldozer and co.) the cores came in groups
1968 1968 * called compute units. These compute units share the L1I cache, L2 cache,
1969 1969 * and the FPU. To deal with this, a new topology leaf was added in
1970 1970 * 0x8000001e. However, parts of this leaf have different meanings
1971 1971 * once we get to family 0x17.
1972 1972 */
1973 1973
1974 1974 static void
1975 1975 cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
1976 1976 {
1977 1977 int i, first_half, coreidsz;
1978 1978 uint32_t nb_caps_reg;
1979 1979 uint_t node2_1;
1980 1980 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1981 1981 struct cpuid_regs *cp;
1982 1982
1983 1983 /*
1984 1984 * Calculate the core id (this comes from hardware in family 0x17 if it
1985 1985 * hasn't been stripped by virtualization). We always set the compute
1986 1986 * unit id to the same value. Also, initialize the default number of
1987 1987 * cores per compute unit and nodes per package. This will be
1988 1988 * overwritten when we know information about a particular family.
1989 1989 */
1990 1990 cpi->cpi_coreid = cpuid_amd_get_coreid(cpu);
1991 1991 cpi->cpi_compunitid = cpi->cpi_coreid;
1992 1992 cpi->cpi_cores_per_compunit = 1;
1993 1993 cpi->cpi_procnodes_per_pkg = 1;
1994 1994
1995 1995 /*
1996 1996 * To construct the logical ID, we need to determine how many APIC IDs
1997 1997 * are dedicated to the cores and threads. This is provided for us in
1998 1998 * 0x80000008. However, if it's not present (say due to virtualization),
1999 1999 * then we assume it's one. This should be present on all 64-bit AMD
2000 2000 * processors. It was added in family 0xf (Hammer).
2001 2001 */
2002 2002 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2003 2003 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
2004 2004
2005 2005 /*
2006 2006 * In AMD parlance chip is really a node while illumos
2007 2007 * uses chip as equivalent to socket/package.
2008 2008 */
2009 2009 if (coreidsz == 0) {
2010 2010 /* Use legacy method */
2011 2011 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
2012 2012 coreidsz++;
2013 2013 if (coreidsz == 0)
2014 2014 coreidsz = 1;
2015 2015 }
2016 2016 } else {
2017 2017 /* Assume single-core part */
2018 2018 coreidsz = 1;
2019 2019 }
2020 2020 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << coreidsz) - 1);
2021 2021
2022 2022 /*
2023 2023 * The package core ID varies depending on the family. For family 17h,
2024 2024 * we can get this directly from leaf CPUID_LEAF_EXT_1e. Otherwise, we
2025 2025 * can use the clogid as is. When family 17h is virtualized, the clogid
2026 2026 * should be sufficient as if we don't have valid data in the leaf, then
2027 2027 * we won't think we have SMT, in which case the cpi_clogid should be
2028 2028 * sufficient.
2029 2029 */
2030 2030 if (cpi->cpi_family >= 0x17 &&
2031 2031 is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2032 2032 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e &&
2033 2033 cpi->cpi_extd[0x1e].cp_ebx != 0) {
2034 2034 cpi->cpi_pkgcoreid = BITX(cpi->cpi_extd[0x1e].cp_ebx, 7, 0);
2035 2035 } else {
2036 2036 cpi->cpi_pkgcoreid = cpi->cpi_clogid;
2037 2037 }
2038 2038
2039 2039 /*
2040 2040 * Obtain the node ID and compute unit IDs. If we're on family 0x15
2041 2041 * (bulldozer) or newer, then we can derive all of this from leaf
2042 2042 * CPUID_LEAF_EXT_1e. Otherwise, the method varies by family.
2043 2043 */
2044 2044 if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
2045 2045 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1e) {
2046 2046 cp = &cpi->cpi_extd[0x1e];
2047 2047
2048 2048 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
2049 2049 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
2050 2050
2051 2051 /*
2052 2052 * For Bulldozer-era CPUs, recalculate the compute unit
2053 2053 * information.
2054 2054 */
2055 2055 if (cpi->cpi_family >= 0x15 && cpi->cpi_family < 0x17) {
2056 2056 cpi->cpi_cores_per_compunit =
2057 2057 BITX(cp->cp_ebx, 15, 8) + 1;
2058 2058 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0) +
2059 2059 (cpi->cpi_ncore_per_chip /
2060 2060 cpi->cpi_cores_per_compunit) *
2061 2061 (cpi->cpi_procnodeid /
2062 2062 cpi->cpi_procnodes_per_pkg);
2063 2063 }
2064 2064 } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
2065 2065 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
2066 2066 } else if (cpi->cpi_family == 0x10) {
2067 2067 /*
2068 2068 * See if we are a multi-node processor.
2069 2069 * All processors in the system have the same number of nodes
2070 2070 */
2071 2071 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
2072 2072 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
2073 2073 /* Single-node */
2074 2074 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
2075 2075 coreidsz);
2076 2076 } else {
2077 2077
2078 2078 /*
2079 2079 * Multi-node revision D (2 nodes per package
2080 2080 * are supported)
2081 2081 */
2082 2082 cpi->cpi_procnodes_per_pkg = 2;
2083 2083
2084 2084 first_half = (cpi->cpi_pkgcoreid <=
2085 2085 (cpi->cpi_ncore_per_chip/2 - 1));
2086 2086
2087 2087 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
2088 2088 /* We are BSP */
2089 2089 cpi->cpi_procnodeid = (first_half ? 0 : 1);
2090 2090 } else {
2091 2091
2092 2092 /* We are AP */
2093 2093 /* NodeId[2:1] bits to use for reading F3xe8 */
2094 2094 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
2095 2095
2096 2096 nb_caps_reg =
2097 2097 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
2098 2098
2099 2099 /*
2100 2100 * Check IntNodeNum bit (31:30, but bit 31 is
2101 2101 * always 0 on dual-node processors)
2102 2102 */
2103 2103 if (BITX(nb_caps_reg, 30, 30) == 0)
2104 2104 cpi->cpi_procnodeid = node2_1 +
2105 2105 !first_half;
2106 2106 else
2107 2107 cpi->cpi_procnodeid = node2_1 +
2108 2108 first_half;
2109 2109 }
2110 2110 }
2111 2111 } else {
2112 2112 cpi->cpi_procnodeid = 0;
2113 2113 }
↓ open down ↓ |
2113 lines elided |
↑ open up ↑ |
2114 2114
2115 2115 cpi->cpi_chipid =
2116 2116 cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
2117 2117
2118 2118 cpi->cpi_ncore_bits = coreidsz;
2119 2119 cpi->cpi_nthread_bits = ddi_fls(cpi->cpi_ncpu_per_chip /
2120 2120 cpi->cpi_ncore_per_chip);
2121 2121 }
2122 2122
2123 2123 static void
2124 +spec_l1d_flush_noop(void)
2125 +{
2126 +}
2127 +
2128 +static void
2129 +spec_l1d_flush_msr(void)
2130 +{
2131 + wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
2132 +}
2133 +
2134 +void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
2135 +
2136 +static void
2124 2137 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2125 2138 {
2126 2139 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2127 2140
2128 2141 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2129 2142 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2130 2143 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2131 2144 add_x86_feature(featureset, X86FSET_IBPB);
2132 2145 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2133 2146 add_x86_feature(featureset, X86FSET_IBRS);
2134 2147 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2135 2148 add_x86_feature(featureset, X86FSET_STIBP);
2136 2149 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS_ALL)
2137 2150 add_x86_feature(featureset, X86FSET_IBRS_ALL);
2138 2151 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2139 2152 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2140 2153 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_PREFER_IBRS)
2141 2154 add_x86_feature(featureset, X86FSET_RSBA);
2142 2155 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2143 2156 add_x86_feature(featureset, X86FSET_SSBD);
2144 2157 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2145 2158 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2146 2159 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2147 2160 add_x86_feature(featureset, X86FSET_SSB_NO);
2148 2161 } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2149 2162 cpi->cpi_maxeax >= 7) {
2150 2163 struct cpuid_regs *ecp;
2151 2164 ecp = &cpi->cpi_std[7];
2152 2165
2153 2166 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
2154 2167 add_x86_feature(featureset, X86FSET_IBRS);
2155 2168 add_x86_feature(featureset, X86FSET_IBPB);
2156 2169 }
2157 2170
2158 2171 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_STIBP) {
2159 2172 add_x86_feature(featureset, X86FSET_STIBP);
2160 2173 }
2161 2174
2162 2175 /*
2163 2176 * Don't read the arch caps MSR on xpv where we lack the
2164 2177 * on_trap().
2165 2178 */
2166 2179 #ifndef __xpv
2167 2180 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_ARCH_CAPS) {
2168 2181 on_trap_data_t otd;
2169 2182
2170 2183 /*
2171 2184 * Be paranoid and assume we'll get a #GP.
2172 2185 */
2173 2186 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2174 2187 uint64_t reg;
2175 2188
2176 2189 reg = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
2177 2190 if (reg & IA32_ARCH_CAP_RDCL_NO) {
2178 2191 add_x86_feature(featureset,
2179 2192 X86FSET_RDCL_NO);
2180 2193 }
2181 2194 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2182 2195 add_x86_feature(featureset,
2183 2196 X86FSET_IBRS_ALL);
2184 2197 }
2185 2198 if (reg & IA32_ARCH_CAP_RSBA) {
2186 2199 add_x86_feature(featureset,
2187 2200 X86FSET_RSBA);
2188 2201 }
2189 2202 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2190 2203 add_x86_feature(featureset,
2191 2204 X86FSET_L1D_VM_NO);
2192 2205 }
2193 2206 if (reg & IA32_ARCH_CAP_SSB_NO) {
2194 2207 add_x86_feature(featureset,
2195 2208 X86FSET_SSB_NO);
2196 2209 }
2197 2210 }
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
2198 2211 no_trap();
2199 2212 }
2200 2213 #endif /* !__xpv */
2201 2214
2202 2215 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2203 2216 add_x86_feature(featureset, X86FSET_SSBD);
2204 2217
2205 2218 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2206 2219 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2207 2220 }
2221 +
2222 + if (cpu->cpu_id != 0)
2223 + return;
2224 +
2225 + /*
2226 + * We're the boot CPU, so let's figure out our L1TF status.
2227 + *
2228 + * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
2229 + * need to exclude with ht_acquire(), and we don't need to flush.
2230 + */
2231 + if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
2232 + extern int ht_exclusion;
2233 + ht_exclusion = 0;
2234 + spec_l1d_flush = spec_l1d_flush_noop;
2235 + membar_producer();
2236 + return;
2237 + }
2238 +
2239 + /*
2240 + * If HT is enabled, we will need HT exclusion, as well as the flush on
2241 + * VM entry. If HT isn't enabled, we still need at least the flush for
2242 + * the L1TF sequential case.
2243 + *
2244 + * However, if X86FSET_L1D_VM_NO is set, we're most likely running
2245 + * inside a VM ourselves, and we don't need the flush.
2246 + *
2247 + * If we don't have the FLUSH_CMD available at all, we'd better just
2248 + * hope HT is disabled.
2249 + */
2250 + if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
2251 + !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
2252 + spec_l1d_flush = spec_l1d_flush_msr;
2253 + } else {
2254 + spec_l1d_flush = spec_l1d_flush_noop;
2255 + }
2256 +
2257 + membar_producer();
2208 2258 }
2209 2259
2210 2260 /*
2211 2261 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2212 2262 */
2213 2263 void
2214 2264 setup_xfem(void)
2215 2265 {
2216 2266 uint64_t flags = XFEATURE_LEGACY_FP;
2217 2267
2218 2268 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2219 2269
2220 2270 if (is_x86_feature(x86_featureset, X86FSET_SSE))
2221 2271 flags |= XFEATURE_SSE;
2222 2272
2223 2273 if (is_x86_feature(x86_featureset, X86FSET_AVX))
2224 2274 flags |= XFEATURE_AVX;
2225 2275
2226 2276 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2227 2277 flags |= XFEATURE_AVX512;
2228 2278
2229 2279 set_xcr(XFEATURE_ENABLED_MASK, flags);
2230 2280
2231 2281 xsave_bv_all = flags;
2232 2282 }
2233 2283
2234 2284 static void
2235 2285 cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
2236 2286 {
2237 2287 struct cpuid_info *cpi;
2238 2288
2239 2289 cpi = cpu->cpu_m.mcpu_cpi;
2240 2290
2241 2291 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2242 2292 cpuid_gather_amd_topology_leaves(cpu);
2243 2293 }
2244 2294
2245 2295 cpi->cpi_apicid = cpuid_gather_apicid(cpi);
2246 2296
2247 2297 /*
2248 2298 * Before we can calculate the IDs that we should assign to this
2249 2299 * processor, we need to understand how many cores and threads it has.
2250 2300 */
2251 2301 switch (cpi->cpi_vendor) {
2252 2302 case X86_VENDOR_Intel:
2253 2303 cpuid_intel_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2254 2304 &cpi->cpi_ncore_per_chip);
2255 2305 break;
2256 2306 case X86_VENDOR_AMD:
2257 2307 cpuid_amd_ncores(cpi, &cpi->cpi_ncpu_per_chip,
2258 2308 &cpi->cpi_ncore_per_chip);
2259 2309 break;
2260 2310 default:
2261 2311 /*
2262 2312 * If we have some other x86 compatible chip, it's not clear how
2263 2313 * they would behave. The most common case is virtualization
2264 2314 * today, though there are also 64-bit VIA chips. Assume that
2265 2315 * all we can get is the basic Leaf 1 HTT information.
2266 2316 */
2267 2317 if ((cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_HTT) != 0) {
2268 2318 cpi->cpi_ncore_per_chip = 1;
2269 2319 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
2270 2320 }
2271 2321 break;
2272 2322 }
2273 2323
2274 2324 /*
2275 2325 * Based on the calculated number of threads and cores, potentially
2276 2326 * assign the HTT and CMT features.
2277 2327 */
2278 2328 if (cpi->cpi_ncore_per_chip > 1) {
2279 2329 add_x86_feature(featureset, X86FSET_CMP);
2280 2330 }
2281 2331
2282 2332 if (cpi->cpi_ncpu_per_chip > 1 &&
2283 2333 cpi->cpi_ncpu_per_chip != cpi->cpi_ncore_per_chip) {
2284 2334 add_x86_feature(featureset, X86FSET_HTT);
2285 2335 }
2286 2336
2287 2337 /*
2288 2338 * Now that has been set up, we need to go through and calculate all of
2289 2339 * the rest of the parameters that exist. If we think the CPU doesn't
2290 2340 * have either SMT (HTT) or CMP, then we basically go through and fake
2291 2341 * up information in some way. The most likely case for this is
2292 2342 * virtualization where we have a lot of partial topology information.
2293 2343 */
2294 2344 if (!is_x86_feature(featureset, X86FSET_HTT) &&
2295 2345 !is_x86_feature(featureset, X86FSET_CMP)) {
2296 2346 /*
2297 2347 * This is a single core, single-threaded processor.
2298 2348 */
2299 2349 cpi->cpi_procnodes_per_pkg = 1;
2300 2350 cpi->cpi_cores_per_compunit = 1;
2301 2351 cpi->cpi_compunitid = 0;
2302 2352 cpi->cpi_chipid = -1;
2303 2353 cpi->cpi_clogid = 0;
2304 2354 cpi->cpi_coreid = cpu->cpu_id;
2305 2355 cpi->cpi_pkgcoreid = 0;
2306 2356 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
2307 2357 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
2308 2358 } else {
2309 2359 cpi->cpi_procnodeid = cpi->cpi_chipid;
2310 2360 }
2311 2361 } else {
2312 2362 switch (cpi->cpi_vendor) {
2313 2363 case X86_VENDOR_Intel:
2314 2364 cpuid_intel_getids(cpu, featureset);
2315 2365 break;
2316 2366 case X86_VENDOR_AMD:
2317 2367 cpuid_amd_getids(cpu, featureset);
2318 2368 break;
2319 2369 default:
2320 2370 /*
2321 2371 * In this case, it's hard to say what we should do.
2322 2372 * We're going to model them to the OS as single core
2323 2373 * threads. We don't have a good identifier for them, so
2324 2374 * we're just going to use the cpu id all on a single
2325 2375 * chip.
2326 2376 *
2327 2377 * This case has historically been different from the
2328 2378 * case above where we don't have HTT or CMP. While they
2329 2379 * could be combined, we've opted to keep it separate to
2330 2380 * minimize the risk of topology changes in weird cases.
2331 2381 */
2332 2382 cpi->cpi_procnodes_per_pkg = 1;
2333 2383 cpi->cpi_cores_per_compunit = 1;
2334 2384 cpi->cpi_chipid = 0;
2335 2385 cpi->cpi_coreid = cpu->cpu_id;
2336 2386 cpi->cpi_clogid = cpu->cpu_id;
2337 2387 cpi->cpi_pkgcoreid = cpu->cpu_id;
2338 2388 cpi->cpi_procnodeid = cpi->cpi_chipid;
2339 2389 cpi->cpi_compunitid = cpi->cpi_coreid;
2340 2390 break;
2341 2391 }
2342 2392 }
2343 2393 }
2344 2394
2345 2395 void
2346 2396 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
2347 2397 {
2348 2398 uint32_t mask_ecx, mask_edx;
2349 2399 struct cpuid_info *cpi;
2350 2400 struct cpuid_regs *cp;
2351 2401 int xcpuid;
2352 2402 #if !defined(__xpv)
2353 2403 extern int idle_cpu_prefer_mwait;
2354 2404 #endif
2355 2405
2356 2406 /*
2357 2407 * Space statically allocated for BSP, ensure pointer is set
2358 2408 */
2359 2409 if (cpu->cpu_id == 0) {
2360 2410 if (cpu->cpu_m.mcpu_cpi == NULL)
2361 2411 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
2362 2412 }
2363 2413
2364 2414 add_x86_feature(featureset, X86FSET_CPUID);
2365 2415
2366 2416 cpi = cpu->cpu_m.mcpu_cpi;
2367 2417 ASSERT(cpi != NULL);
2368 2418 cp = &cpi->cpi_std[0];
2369 2419 cp->cp_eax = 0;
2370 2420 cpi->cpi_maxeax = __cpuid_insn(cp);
2371 2421 {
2372 2422 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
2373 2423 *iptr++ = cp->cp_ebx;
2374 2424 *iptr++ = cp->cp_edx;
2375 2425 *iptr++ = cp->cp_ecx;
2376 2426 *(char *)&cpi->cpi_vendorstr[12] = '\0';
2377 2427 }
2378 2428
2379 2429 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
2380 2430 x86_vendor = cpi->cpi_vendor; /* for compatibility */
2381 2431
2382 2432 /*
2383 2433 * Limit the range in case of weird hardware
2384 2434 */
2385 2435 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
2386 2436 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
2387 2437 if (cpi->cpi_maxeax < 1)
2388 2438 goto pass1_done;
2389 2439
2390 2440 cp = &cpi->cpi_std[1];
2391 2441 cp->cp_eax = 1;
2392 2442 (void) __cpuid_insn(cp);
2393 2443
2394 2444 /*
2395 2445 * Extract identifying constants for easy access.
2396 2446 */
2397 2447 cpi->cpi_model = CPI_MODEL(cpi);
2398 2448 cpi->cpi_family = CPI_FAMILY(cpi);
2399 2449
2400 2450 if (cpi->cpi_family == 0xf)
2401 2451 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
2402 2452
2403 2453 /*
2404 2454 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
2405 2455 * Intel, and presumably everyone else, uses model == 0xf, as
2406 2456 * one would expect (max value means possible overflow). Sigh.
2407 2457 */
2408 2458
2409 2459 switch (cpi->cpi_vendor) {
2410 2460 case X86_VENDOR_Intel:
2411 2461 if (IS_EXTENDED_MODEL_INTEL(cpi))
2412 2462 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2413 2463 break;
2414 2464 case X86_VENDOR_AMD:
2415 2465 if (CPI_FAMILY(cpi) == 0xf)
2416 2466 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2417 2467 break;
2418 2468 default:
2419 2469 if (cpi->cpi_model == 0xf)
2420 2470 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
2421 2471 break;
2422 2472 }
2423 2473
2424 2474 cpi->cpi_step = CPI_STEP(cpi);
2425 2475 cpi->cpi_brandid = CPI_BRANDID(cpi);
2426 2476
2427 2477 /*
2428 2478 * *default* assumptions:
2429 2479 * - believe %edx feature word
2430 2480 * - ignore %ecx feature word
2431 2481 * - 32-bit virtual and physical addressing
2432 2482 */
2433 2483 mask_edx = 0xffffffff;
2434 2484 mask_ecx = 0;
2435 2485
2436 2486 cpi->cpi_pabits = cpi->cpi_vabits = 32;
2437 2487
2438 2488 switch (cpi->cpi_vendor) {
2439 2489 case X86_VENDOR_Intel:
2440 2490 if (cpi->cpi_family == 5)
2441 2491 x86_type = X86_TYPE_P5;
2442 2492 else if (IS_LEGACY_P6(cpi)) {
2443 2493 x86_type = X86_TYPE_P6;
2444 2494 pentiumpro_bug4046376 = 1;
2445 2495 /*
2446 2496 * Clear the SEP bit when it was set erroneously
2447 2497 */
2448 2498 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
2449 2499 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
2450 2500 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
2451 2501 x86_type = X86_TYPE_P4;
2452 2502 /*
2453 2503 * We don't currently depend on any of the %ecx
2454 2504 * features until Prescott, so we'll only check
2455 2505 * this from P4 onwards. We might want to revisit
2456 2506 * that idea later.
2457 2507 */
2458 2508 mask_ecx = 0xffffffff;
2459 2509 } else if (cpi->cpi_family > 0xf)
2460 2510 mask_ecx = 0xffffffff;
2461 2511 /*
2462 2512 * We don't support MONITOR/MWAIT if leaf 5 is not available
2463 2513 * to obtain the monitor linesize.
2464 2514 */
2465 2515 if (cpi->cpi_maxeax < 5)
2466 2516 mask_ecx &= ~CPUID_INTC_ECX_MON;
2467 2517 break;
2468 2518 case X86_VENDOR_IntelClone:
2469 2519 default:
2470 2520 break;
2471 2521 case X86_VENDOR_AMD:
2472 2522 #if defined(OPTERON_ERRATUM_108)
2473 2523 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
2474 2524 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
2475 2525 cpi->cpi_model = 0xc;
2476 2526 } else
2477 2527 #endif
2478 2528 if (cpi->cpi_family == 5) {
2479 2529 /*
2480 2530 * AMD K5 and K6
2481 2531 *
2482 2532 * These CPUs have an incomplete implementation
2483 2533 * of MCA/MCE which we mask away.
2484 2534 */
2485 2535 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
2486 2536
2487 2537 /*
2488 2538 * Model 0 uses the wrong (APIC) bit
2489 2539 * to indicate PGE. Fix it here.
2490 2540 */
2491 2541 if (cpi->cpi_model == 0) {
2492 2542 if (cp->cp_edx & 0x200) {
2493 2543 cp->cp_edx &= ~0x200;
2494 2544 cp->cp_edx |= CPUID_INTC_EDX_PGE;
2495 2545 }
2496 2546 }
2497 2547
2498 2548 /*
2499 2549 * Early models had problems w/ MMX; disable.
2500 2550 */
2501 2551 if (cpi->cpi_model < 6)
2502 2552 mask_edx &= ~CPUID_INTC_EDX_MMX;
2503 2553 }
2504 2554
2505 2555 /*
2506 2556 * For newer families, SSE3 and CX16, at least, are valid;
2507 2557 * enable all
2508 2558 */
2509 2559 if (cpi->cpi_family >= 0xf)
2510 2560 mask_ecx = 0xffffffff;
2511 2561 /*
2512 2562 * We don't support MONITOR/MWAIT if leaf 5 is not available
2513 2563 * to obtain the monitor linesize.
2514 2564 */
2515 2565 if (cpi->cpi_maxeax < 5)
2516 2566 mask_ecx &= ~CPUID_INTC_ECX_MON;
2517 2567
2518 2568 #if !defined(__xpv)
2519 2569 /*
2520 2570 * AMD has not historically used MWAIT in the CPU's idle loop.
2521 2571 * Pre-family-10h Opterons do not have the MWAIT instruction. We
2522 2572 * know for certain that in at least family 17h, per AMD, mwait
2523 2573 * is preferred. Families in-between are less certain.
2524 2574 */
2525 2575 if (cpi->cpi_family < 0x17) {
2526 2576 idle_cpu_prefer_mwait = 0;
2527 2577 }
2528 2578 #endif
2529 2579
2530 2580 break;
2531 2581 case X86_VENDOR_TM:
2532 2582 /*
2533 2583 * workaround the NT workaround in CMS 4.1
2534 2584 */
2535 2585 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
2536 2586 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
2537 2587 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2538 2588 break;
2539 2589 case X86_VENDOR_Centaur:
2540 2590 /*
2541 2591 * workaround the NT workarounds again
2542 2592 */
2543 2593 if (cpi->cpi_family == 6)
2544 2594 cp->cp_edx |= CPUID_INTC_EDX_CX8;
2545 2595 break;
2546 2596 case X86_VENDOR_Cyrix:
2547 2597 /*
2548 2598 * We rely heavily on the probing in locore
2549 2599 * to actually figure out what parts, if any,
2550 2600 * of the Cyrix cpuid instruction to believe.
2551 2601 */
2552 2602 switch (x86_type) {
2553 2603 case X86_TYPE_CYRIX_486:
2554 2604 mask_edx = 0;
2555 2605 break;
2556 2606 case X86_TYPE_CYRIX_6x86:
2557 2607 mask_edx = 0;
2558 2608 break;
2559 2609 case X86_TYPE_CYRIX_6x86L:
2560 2610 mask_edx =
2561 2611 CPUID_INTC_EDX_DE |
2562 2612 CPUID_INTC_EDX_CX8;
2563 2613 break;
2564 2614 case X86_TYPE_CYRIX_6x86MX:
2565 2615 mask_edx =
2566 2616 CPUID_INTC_EDX_DE |
2567 2617 CPUID_INTC_EDX_MSR |
2568 2618 CPUID_INTC_EDX_CX8 |
2569 2619 CPUID_INTC_EDX_PGE |
2570 2620 CPUID_INTC_EDX_CMOV |
2571 2621 CPUID_INTC_EDX_MMX;
2572 2622 break;
2573 2623 case X86_TYPE_CYRIX_GXm:
2574 2624 mask_edx =
2575 2625 CPUID_INTC_EDX_MSR |
2576 2626 CPUID_INTC_EDX_CX8 |
2577 2627 CPUID_INTC_EDX_CMOV |
2578 2628 CPUID_INTC_EDX_MMX;
2579 2629 break;
2580 2630 case X86_TYPE_CYRIX_MediaGX:
2581 2631 break;
2582 2632 case X86_TYPE_CYRIX_MII:
2583 2633 case X86_TYPE_VIA_CYRIX_III:
2584 2634 mask_edx =
2585 2635 CPUID_INTC_EDX_DE |
2586 2636 CPUID_INTC_EDX_TSC |
2587 2637 CPUID_INTC_EDX_MSR |
2588 2638 CPUID_INTC_EDX_CX8 |
2589 2639 CPUID_INTC_EDX_PGE |
2590 2640 CPUID_INTC_EDX_CMOV |
2591 2641 CPUID_INTC_EDX_MMX;
2592 2642 break;
2593 2643 default:
2594 2644 break;
2595 2645 }
2596 2646 break;
2597 2647 }
2598 2648
2599 2649 #if defined(__xpv)
2600 2650 /*
2601 2651 * Do not support MONITOR/MWAIT under a hypervisor
2602 2652 */
2603 2653 mask_ecx &= ~CPUID_INTC_ECX_MON;
2604 2654 /*
2605 2655 * Do not support XSAVE under a hypervisor for now
2606 2656 */
2607 2657 xsave_force_disable = B_TRUE;
2608 2658
2609 2659 #endif /* __xpv */
2610 2660
2611 2661 if (xsave_force_disable) {
2612 2662 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
2613 2663 mask_ecx &= ~CPUID_INTC_ECX_AVX;
2614 2664 mask_ecx &= ~CPUID_INTC_ECX_F16C;
2615 2665 mask_ecx &= ~CPUID_INTC_ECX_FMA;
2616 2666 }
2617 2667
2618 2668 /*
2619 2669 * Now we've figured out the masks that determine
2620 2670 * which bits we choose to believe, apply the masks
2621 2671 * to the feature words, then map the kernel's view
2622 2672 * of these feature words into its feature word.
2623 2673 */
2624 2674 cp->cp_edx &= mask_edx;
2625 2675 cp->cp_ecx &= mask_ecx;
2626 2676
2627 2677 /*
2628 2678 * apply any platform restrictions (we don't call this
2629 2679 * immediately after __cpuid_insn here, because we need the
2630 2680 * workarounds applied above first)
2631 2681 */
2632 2682 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
2633 2683
2634 2684 /*
2635 2685 * In addition to ecx and edx, Intel and AMD are storing a bunch of
2636 2686 * instruction set extensions in leaf 7's ebx, ecx, and edx.
2637 2687 */
2638 2688 if (cpi->cpi_maxeax >= 7) {
2639 2689 struct cpuid_regs *ecp;
2640 2690 ecp = &cpi->cpi_std[7];
2641 2691 ecp->cp_eax = 7;
2642 2692 ecp->cp_ecx = 0;
2643 2693 (void) __cpuid_insn(ecp);
2644 2694
2645 2695 /*
2646 2696 * If XSAVE has been disabled, just ignore all of the
2647 2697 * extended-save-area dependent flags here.
2648 2698 */
2649 2699 if (xsave_force_disable) {
2650 2700 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
2651 2701 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
2652 2702 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
2653 2703 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_MPX;
2654 2704 ecp->cp_ebx &= ~CPUID_INTC_EBX_7_0_ALL_AVX512;
2655 2705 ecp->cp_ecx &= ~CPUID_INTC_ECX_7_0_ALL_AVX512;
2656 2706 ecp->cp_edx &= ~CPUID_INTC_EDX_7_0_ALL_AVX512;
2657 2707 }
2658 2708
2659 2709 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
2660 2710 add_x86_feature(featureset, X86FSET_SMEP);
2661 2711
2662 2712 /*
2663 2713 * We check disable_smap here in addition to in startup_smap()
2664 2714 * to ensure CPUs that aren't the boot CPU don't accidentally
2665 2715 * include it in the feature set and thus generate a mismatched
2666 2716 * x86 feature set across CPUs.
2667 2717 */
2668 2718 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMAP &&
2669 2719 disable_smap == 0)
2670 2720 add_x86_feature(featureset, X86FSET_SMAP);
2671 2721
2672 2722 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
2673 2723 add_x86_feature(featureset, X86FSET_RDSEED);
2674 2724
2675 2725 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
2676 2726 add_x86_feature(featureset, X86FSET_ADX);
2677 2727
2678 2728 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
2679 2729 add_x86_feature(featureset, X86FSET_FSGSBASE);
2680 2730
2681 2731 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
2682 2732 add_x86_feature(featureset, X86FSET_CLFLUSHOPT);
2683 2733
2684 2734 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2685 2735 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID)
2686 2736 add_x86_feature(featureset, X86FSET_INVPCID);
2687 2737
2688 2738 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_MPX)
2689 2739 add_x86_feature(featureset, X86FSET_MPX);
2690 2740
2691 2741 if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_CLWB)
2692 2742 add_x86_feature(featureset, X86FSET_CLWB);
2693 2743 }
2694 2744 }
2695 2745
2696 2746 /*
2697 2747 * fold in overrides from the "eeprom" mechanism
2698 2748 */
2699 2749 cp->cp_edx |= cpuid_feature_edx_include;
2700 2750 cp->cp_edx &= ~cpuid_feature_edx_exclude;
2701 2751
2702 2752 cp->cp_ecx |= cpuid_feature_ecx_include;
2703 2753 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
2704 2754
2705 2755 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
2706 2756 add_x86_feature(featureset, X86FSET_LARGEPAGE);
2707 2757 }
2708 2758 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
2709 2759 add_x86_feature(featureset, X86FSET_TSC);
2710 2760 }
2711 2761 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
2712 2762 add_x86_feature(featureset, X86FSET_MSR);
2713 2763 }
2714 2764 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
2715 2765 add_x86_feature(featureset, X86FSET_MTRR);
2716 2766 }
2717 2767 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
2718 2768 add_x86_feature(featureset, X86FSET_PGE);
2719 2769 }
2720 2770 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
2721 2771 add_x86_feature(featureset, X86FSET_CMOV);
2722 2772 }
2723 2773 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
2724 2774 add_x86_feature(featureset, X86FSET_MMX);
2725 2775 }
2726 2776 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
2727 2777 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
2728 2778 add_x86_feature(featureset, X86FSET_MCA);
2729 2779 }
2730 2780 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
2731 2781 add_x86_feature(featureset, X86FSET_PAE);
2732 2782 }
2733 2783 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
2734 2784 add_x86_feature(featureset, X86FSET_CX8);
2735 2785 }
2736 2786 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
2737 2787 add_x86_feature(featureset, X86FSET_CX16);
2738 2788 }
2739 2789 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
2740 2790 add_x86_feature(featureset, X86FSET_PAT);
2741 2791 }
2742 2792 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
2743 2793 add_x86_feature(featureset, X86FSET_SEP);
2744 2794 }
2745 2795 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
2746 2796 /*
2747 2797 * In our implementation, fxsave/fxrstor
2748 2798 * are prerequisites before we'll even
2749 2799 * try and do SSE things.
2750 2800 */
2751 2801 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
2752 2802 add_x86_feature(featureset, X86FSET_SSE);
2753 2803 }
2754 2804 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
2755 2805 add_x86_feature(featureset, X86FSET_SSE2);
2756 2806 }
2757 2807 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
2758 2808 add_x86_feature(featureset, X86FSET_SSE3);
2759 2809 }
2760 2810 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
2761 2811 add_x86_feature(featureset, X86FSET_SSSE3);
2762 2812 }
2763 2813 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
2764 2814 add_x86_feature(featureset, X86FSET_SSE4_1);
2765 2815 }
2766 2816 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
2767 2817 add_x86_feature(featureset, X86FSET_SSE4_2);
2768 2818 }
2769 2819 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
2770 2820 add_x86_feature(featureset, X86FSET_AES);
2771 2821 }
2772 2822 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
2773 2823 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
2774 2824 }
2775 2825
2776 2826 if (cpi->cpi_std[7].cp_ebx & CPUID_INTC_EBX_7_0_SHA)
2777 2827 add_x86_feature(featureset, X86FSET_SHA);
2778 2828
2779 2829 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_UMIP)
2780 2830 add_x86_feature(featureset, X86FSET_UMIP);
2781 2831 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_PKU)
2782 2832 add_x86_feature(featureset, X86FSET_PKU);
2783 2833 if (cpi->cpi_std[7].cp_ecx & CPUID_INTC_ECX_7_0_OSPKE)
2784 2834 add_x86_feature(featureset, X86FSET_OSPKE);
2785 2835
2786 2836 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
2787 2837 add_x86_feature(featureset, X86FSET_XSAVE);
2788 2838
2789 2839 /* We only test AVX & AVX512 when there is XSAVE */
2790 2840
2791 2841 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
2792 2842 add_x86_feature(featureset,
2793 2843 X86FSET_AVX);
2794 2844
2795 2845 /*
2796 2846 * Intel says we can't check these without also
2797 2847 * checking AVX.
2798 2848 */
2799 2849 if (cp->cp_ecx & CPUID_INTC_ECX_F16C)
2800 2850 add_x86_feature(featureset,
2801 2851 X86FSET_F16C);
2802 2852
2803 2853 if (cp->cp_ecx & CPUID_INTC_ECX_FMA)
2804 2854 add_x86_feature(featureset,
2805 2855 X86FSET_FMA);
2806 2856
2807 2857 if (cpi->cpi_std[7].cp_ebx &
2808 2858 CPUID_INTC_EBX_7_0_BMI1)
2809 2859 add_x86_feature(featureset,
2810 2860 X86FSET_BMI1);
2811 2861
2812 2862 if (cpi->cpi_std[7].cp_ebx &
2813 2863 CPUID_INTC_EBX_7_0_BMI2)
2814 2864 add_x86_feature(featureset,
2815 2865 X86FSET_BMI2);
2816 2866
2817 2867 if (cpi->cpi_std[7].cp_ebx &
2818 2868 CPUID_INTC_EBX_7_0_AVX2)
2819 2869 add_x86_feature(featureset,
2820 2870 X86FSET_AVX2);
2821 2871 }
2822 2872
2823 2873 if (cpi->cpi_vendor == X86_VENDOR_Intel &&
2824 2874 (cpi->cpi_std[7].cp_ebx &
2825 2875 CPUID_INTC_EBX_7_0_AVX512F) != 0) {
2826 2876 add_x86_feature(featureset, X86FSET_AVX512F);
2827 2877
2828 2878 if (cpi->cpi_std[7].cp_ebx &
2829 2879 CPUID_INTC_EBX_7_0_AVX512DQ)
2830 2880 add_x86_feature(featureset,
2831 2881 X86FSET_AVX512DQ);
2832 2882 if (cpi->cpi_std[7].cp_ebx &
2833 2883 CPUID_INTC_EBX_7_0_AVX512IFMA)
2834 2884 add_x86_feature(featureset,
2835 2885 X86FSET_AVX512FMA);
2836 2886 if (cpi->cpi_std[7].cp_ebx &
2837 2887 CPUID_INTC_EBX_7_0_AVX512PF)
2838 2888 add_x86_feature(featureset,
2839 2889 X86FSET_AVX512PF);
2840 2890 if (cpi->cpi_std[7].cp_ebx &
2841 2891 CPUID_INTC_EBX_7_0_AVX512ER)
2842 2892 add_x86_feature(featureset,
2843 2893 X86FSET_AVX512ER);
2844 2894 if (cpi->cpi_std[7].cp_ebx &
2845 2895 CPUID_INTC_EBX_7_0_AVX512CD)
2846 2896 add_x86_feature(featureset,
2847 2897 X86FSET_AVX512CD);
2848 2898 if (cpi->cpi_std[7].cp_ebx &
2849 2899 CPUID_INTC_EBX_7_0_AVX512BW)
2850 2900 add_x86_feature(featureset,
2851 2901 X86FSET_AVX512BW);
2852 2902 if (cpi->cpi_std[7].cp_ebx &
2853 2903 CPUID_INTC_EBX_7_0_AVX512VL)
2854 2904 add_x86_feature(featureset,
2855 2905 X86FSET_AVX512VL);
2856 2906
2857 2907 if (cpi->cpi_std[7].cp_ecx &
2858 2908 CPUID_INTC_ECX_7_0_AVX512VBMI)
2859 2909 add_x86_feature(featureset,
2860 2910 X86FSET_AVX512VBMI);
2861 2911 if (cpi->cpi_std[7].cp_ecx &
2862 2912 CPUID_INTC_ECX_7_0_AVX512VNNI)
2863 2913 add_x86_feature(featureset,
2864 2914 X86FSET_AVX512VNNI);
2865 2915 if (cpi->cpi_std[7].cp_ecx &
2866 2916 CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
2867 2917 add_x86_feature(featureset,
2868 2918 X86FSET_AVX512VPOPCDQ);
2869 2919
2870 2920 if (cpi->cpi_std[7].cp_edx &
2871 2921 CPUID_INTC_EDX_7_0_AVX5124NNIW)
2872 2922 add_x86_feature(featureset,
2873 2923 X86FSET_AVX512NNIW);
2874 2924 if (cpi->cpi_std[7].cp_edx &
2875 2925 CPUID_INTC_EDX_7_0_AVX5124FMAPS)
2876 2926 add_x86_feature(featureset,
2877 2927 X86FSET_AVX512FMAPS);
2878 2928 }
2879 2929 }
2880 2930 }
2881 2931
2882 2932 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2883 2933 if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
2884 2934 add_x86_feature(featureset, X86FSET_PCID);
2885 2935 }
2886 2936 }
2887 2937
2888 2938 if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
2889 2939 add_x86_feature(featureset, X86FSET_X2APIC);
2890 2940 }
2891 2941 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
2892 2942 add_x86_feature(featureset, X86FSET_DE);
2893 2943 }
2894 2944 #if !defined(__xpv)
2895 2945 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
2896 2946
2897 2947 /*
2898 2948 * We require the CLFLUSH instruction for erratum workaround
2899 2949 * to use MONITOR/MWAIT.
2900 2950 */
2901 2951 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2902 2952 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
2903 2953 add_x86_feature(featureset, X86FSET_MWAIT);
2904 2954 } else {
2905 2955 extern int idle_cpu_assert_cflush_monitor;
2906 2956
2907 2957 /*
2908 2958 * All processors we are aware of which have
2909 2959 * MONITOR/MWAIT also have CLFLUSH.
2910 2960 */
2911 2961 if (idle_cpu_assert_cflush_monitor) {
2912 2962 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
2913 2963 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
2914 2964 }
2915 2965 }
2916 2966 }
2917 2967 #endif /* __xpv */
2918 2968
2919 2969 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
2920 2970 add_x86_feature(featureset, X86FSET_VMX);
2921 2971 }
2922 2972
2923 2973 if (cp->cp_ecx & CPUID_INTC_ECX_RDRAND)
2924 2974 add_x86_feature(featureset, X86FSET_RDRAND);
2925 2975
2926 2976 /*
2927 2977 * Only need it first time, rest of the cpus would follow suit.
2928 2978 * we only capture this for the bootcpu.
2929 2979 */
2930 2980 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
2931 2981 add_x86_feature(featureset, X86FSET_CLFSH);
2932 2982 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
2933 2983 }
2934 2984 if (is_x86_feature(featureset, X86FSET_PAE))
2935 2985 cpi->cpi_pabits = 36;
2936 2986
2937 2987 if (cpi->cpi_maxeax >= 0xD && !xsave_force_disable) {
2938 2988 struct cpuid_regs r, *ecp;
2939 2989
2940 2990 ecp = &r;
2941 2991 ecp->cp_eax = 0xD;
2942 2992 ecp->cp_ecx = 1;
2943 2993 ecp->cp_edx = ecp->cp_ebx = 0;
2944 2994 (void) __cpuid_insn(ecp);
2945 2995
2946 2996 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEOPT)
2947 2997 add_x86_feature(featureset, X86FSET_XSAVEOPT);
2948 2998 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVEC)
2949 2999 add_x86_feature(featureset, X86FSET_XSAVEC);
2950 3000 if (ecp->cp_eax & CPUID_INTC_EAX_D_1_XSAVES)
2951 3001 add_x86_feature(featureset, X86FSET_XSAVES);
2952 3002 }
2953 3003
2954 3004 /*
2955 3005 * Work on the "extended" feature information, doing
2956 3006 * some basic initialization for cpuid_pass2()
2957 3007 */
2958 3008 xcpuid = 0;
2959 3009 switch (cpi->cpi_vendor) {
2960 3010 case X86_VENDOR_Intel:
2961 3011 /*
2962 3012 * On KVM we know we will have proper support for extended
2963 3013 * cpuid.
2964 3014 */
2965 3015 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf ||
2966 3016 (get_hwenv() == HW_KVM && cpi->cpi_family == 6 &&
2967 3017 (cpi->cpi_model == 6 || cpi->cpi_model == 2)))
2968 3018 xcpuid++;
2969 3019 break;
2970 3020 case X86_VENDOR_AMD:
2971 3021 if (cpi->cpi_family > 5 ||
2972 3022 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
2973 3023 xcpuid++;
2974 3024 break;
2975 3025 case X86_VENDOR_Cyrix:
2976 3026 /*
2977 3027 * Only these Cyrix CPUs are -known- to support
2978 3028 * extended cpuid operations.
2979 3029 */
2980 3030 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
2981 3031 x86_type == X86_TYPE_CYRIX_GXm)
2982 3032 xcpuid++;
2983 3033 break;
2984 3034 case X86_VENDOR_Centaur:
2985 3035 case X86_VENDOR_TM:
2986 3036 default:
2987 3037 xcpuid++;
2988 3038 break;
2989 3039 }
2990 3040
2991 3041 if (xcpuid) {
2992 3042 cp = &cpi->cpi_extd[0];
2993 3043 cp->cp_eax = CPUID_LEAF_EXT_0;
2994 3044 cpi->cpi_xmaxeax = __cpuid_insn(cp);
2995 3045 }
2996 3046
2997 3047 if (cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) {
2998 3048
2999 3049 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
3000 3050 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
3001 3051
3002 3052 switch (cpi->cpi_vendor) {
3003 3053 case X86_VENDOR_Intel:
3004 3054 case X86_VENDOR_AMD:
3005 3055 if (cpi->cpi_xmaxeax < 0x80000001)
3006 3056 break;
3007 3057 cp = &cpi->cpi_extd[1];
3008 3058 cp->cp_eax = 0x80000001;
3009 3059 (void) __cpuid_insn(cp);
3010 3060
3011 3061 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
3012 3062 cpi->cpi_family == 5 &&
3013 3063 cpi->cpi_model == 6 &&
3014 3064 cpi->cpi_step == 6) {
3015 3065 /*
3016 3066 * K6 model 6 uses bit 10 to indicate SYSC
3017 3067 * Later models use bit 11. Fix it here.
3018 3068 */
3019 3069 if (cp->cp_edx & 0x400) {
3020 3070 cp->cp_edx &= ~0x400;
3021 3071 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
3022 3072 }
3023 3073 }
3024 3074
3025 3075 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
3026 3076
3027 3077 /*
3028 3078 * Compute the additions to the kernel's feature word.
3029 3079 */
3030 3080 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
3031 3081 add_x86_feature(featureset, X86FSET_NX);
3032 3082 }
3033 3083
3034 3084 /*
3035 3085 * Regardless whether or not we boot 64-bit,
3036 3086 * we should have a way to identify whether
3037 3087 * the CPU is capable of running 64-bit.
3038 3088 */
3039 3089 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
3040 3090 add_x86_feature(featureset, X86FSET_64);
3041 3091 }
3042 3092
3043 3093 /* 1 GB large page - enable only for 64 bit kernel */
3044 3094 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
3045 3095 add_x86_feature(featureset, X86FSET_1GPG);
3046 3096 }
3047 3097
3048 3098 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
3049 3099 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
3050 3100 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
3051 3101 add_x86_feature(featureset, X86FSET_SSE4A);
3052 3102 }
3053 3103
3054 3104 /*
3055 3105 * It's really tricky to support syscall/sysret in
3056 3106 * the i386 kernel; we rely on sysenter/sysexit
3057 3107 * instead. In the amd64 kernel, things are -way-
3058 3108 * better.
3059 3109 */
3060 3110 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
3061 3111 add_x86_feature(featureset, X86FSET_ASYSC);
3062 3112 }
3063 3113
3064 3114 /*
3065 3115 * While we're thinking about system calls, note
3066 3116 * that AMD processors don't support sysenter
3067 3117 * in long mode at all, so don't try to program them.
3068 3118 */
3069 3119 if (x86_vendor == X86_VENDOR_AMD) {
3070 3120 remove_x86_feature(featureset, X86FSET_SEP);
3071 3121 }
3072 3122
3073 3123 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
3074 3124 add_x86_feature(featureset, X86FSET_TSCP);
3075 3125 }
3076 3126
3077 3127 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
3078 3128 add_x86_feature(featureset, X86FSET_SVM);
3079 3129 }
3080 3130
3081 3131 if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
3082 3132 add_x86_feature(featureset, X86FSET_TOPOEXT);
3083 3133 }
3084 3134
3085 3135 if (cp->cp_ecx & CPUID_AMD_ECX_PCEC) {
3086 3136 add_x86_feature(featureset, X86FSET_AMD_PCEC);
3087 3137 }
3088 3138
3089 3139 if (cp->cp_ecx & CPUID_AMD_ECX_XOP) {
3090 3140 add_x86_feature(featureset, X86FSET_XOP);
3091 3141 }
3092 3142
3093 3143 if (cp->cp_ecx & CPUID_AMD_ECX_FMA4) {
3094 3144 add_x86_feature(featureset, X86FSET_FMA4);
3095 3145 }
3096 3146
3097 3147 if (cp->cp_ecx & CPUID_AMD_ECX_TBM) {
3098 3148 add_x86_feature(featureset, X86FSET_TBM);
3099 3149 }
3100 3150
3101 3151 if (cp->cp_ecx & CPUID_AMD_ECX_MONITORX) {
3102 3152 add_x86_feature(featureset, X86FSET_MONITORX);
3103 3153 }
3104 3154 break;
3105 3155 default:
3106 3156 break;
3107 3157 }
3108 3158
3109 3159 /*
3110 3160 * Get CPUID data about processor cores and hyperthreads.
3111 3161 */
3112 3162 switch (cpi->cpi_vendor) {
3113 3163 case X86_VENDOR_Intel:
3114 3164 if (cpi->cpi_maxeax >= 4) {
3115 3165 cp = &cpi->cpi_std[4];
3116 3166 cp->cp_eax = 4;
3117 3167 cp->cp_ecx = 0;
3118 3168 (void) __cpuid_insn(cp);
3119 3169 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
3120 3170 }
3121 3171 /*FALLTHROUGH*/
3122 3172 case X86_VENDOR_AMD:
3123 3173 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8)
3124 3174 break;
3125 3175 cp = &cpi->cpi_extd[8];
3126 3176 cp->cp_eax = CPUID_LEAF_EXT_8;
3127 3177 (void) __cpuid_insn(cp);
3128 3178 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8,
3129 3179 cp);
3130 3180
3131 3181 /*
3132 3182 * AMD uses ebx for some extended functions.
3133 3183 */
3134 3184 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3135 3185 /*
3136 3186 * While we're here, check for the AMD "Error
3137 3187 * Pointer Zero/Restore" feature. This can be
3138 3188 * used to setup the FP save handlers
3139 3189 * appropriately.
3140 3190 */
3141 3191 if (cp->cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3142 3192 cpi->cpi_fp_amd_save = 0;
3143 3193 } else {
3144 3194 cpi->cpi_fp_amd_save = 1;
3145 3195 }
3146 3196
3147 3197 if (cp->cp_ebx & CPUID_AMD_EBX_CLZERO) {
3148 3198 add_x86_feature(featureset,
3149 3199 X86FSET_CLZERO);
3150 3200 }
3151 3201 }
3152 3202
3153 3203 /*
3154 3204 * Virtual and physical address limits from
3155 3205 * cpuid override previously guessed values.
3156 3206 */
3157 3207 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
3158 3208 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
3159 3209 break;
3160 3210 default:
3161 3211 break;
3162 3212 }
3163 3213
3164 3214 /*
3165 3215 * Get CPUID data about TSC Invariance in Deep C-State.
3166 3216 */
3167 3217 switch (cpi->cpi_vendor) {
3168 3218 case X86_VENDOR_Intel:
3169 3219 case X86_VENDOR_AMD:
3170 3220 if (cpi->cpi_maxeax >= 7) {
3171 3221 cp = &cpi->cpi_extd[7];
3172 3222 cp->cp_eax = 0x80000007;
3173 3223 cp->cp_ecx = 0;
3174 3224 (void) __cpuid_insn(cp);
3175 3225 }
3176 3226 break;
3177 3227 default:
3178 3228 break;
3179 3229 }
3180 3230 }
3181 3231
3182 3232 cpuid_pass1_topology(cpu, featureset);
3183 3233
3184 3234 /*
3185 3235 * Synthesize chip "revision" and socket type
3186 3236 */
3187 3237 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
3188 3238 cpi->cpi_model, cpi->cpi_step);
3189 3239 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
3190 3240 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
3191 3241 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
3192 3242 cpi->cpi_model, cpi->cpi_step);
3193 3243
3194 3244 if (cpi->cpi_vendor == X86_VENDOR_AMD) {
3195 3245 if (cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8 &&
3196 3246 cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_ERR_PTR_ZERO) {
3197 3247 /* Special handling for AMD FP not necessary. */
3198 3248 cpi->cpi_fp_amd_save = 0;
3199 3249 } else {
3200 3250 cpi->cpi_fp_amd_save = 1;
3201 3251 }
3202 3252 }
3203 3253
3204 3254 /*
3205 3255 * Check the processor leaves that are used for security features.
3206 3256 */
3207 3257 cpuid_scan_security(cpu, featureset);
3208 3258
3209 3259 pass1_done:
3210 3260 cpi->cpi_pass = 1;
3211 3261 }
3212 3262
3213 3263 /*
3214 3264 * Make copies of the cpuid table entries we depend on, in
3215 3265 * part for ease of parsing now, in part so that we have only
3216 3266 * one place to correct any of it, in part for ease of
3217 3267 * later export to userland, and in part so we can look at
3218 3268 * this stuff in a crash dump.
3219 3269 */
3220 3270
3221 3271 /*ARGSUSED*/
3222 3272 void
3223 3273 cpuid_pass2(cpu_t *cpu)
3224 3274 {
3225 3275 uint_t n, nmax;
3226 3276 int i;
3227 3277 struct cpuid_regs *cp;
3228 3278 uint8_t *dp;
3229 3279 uint32_t *iptr;
3230 3280 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3231 3281
3232 3282 ASSERT(cpi->cpi_pass == 1);
3233 3283
3234 3284 if (cpi->cpi_maxeax < 1)
3235 3285 goto pass2_done;
3236 3286
3237 3287 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
3238 3288 nmax = NMAX_CPI_STD;
3239 3289 /*
3240 3290 * (We already handled n == 0 and n == 1 in pass 1)
3241 3291 */
3242 3292 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
3243 3293 cp->cp_eax = n;
3244 3294
3245 3295 /*
3246 3296 * n == 7 was handled in pass 1
3247 3297 */
3248 3298 if (n == 7)
3249 3299 continue;
3250 3300
3251 3301 /*
3252 3302 * CPUID function 4 expects %ecx to be initialized
3253 3303 * with an index which indicates which cache to return
3254 3304 * information about. The OS is expected to call function 4
3255 3305 * with %ecx set to 0, 1, 2, ... until it returns with
3256 3306 * EAX[4:0] set to 0, which indicates there are no more
3257 3307 * caches.
3258 3308 *
3259 3309 * Here, populate cpi_std[4] with the information returned by
3260 3310 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
3261 3311 * when dynamic memory allocation becomes available.
3262 3312 *
3263 3313 * Note: we need to explicitly initialize %ecx here, since
3264 3314 * function 4 may have been previously invoked.
3265 3315 */
3266 3316 if (n == 4)
3267 3317 cp->cp_ecx = 0;
3268 3318
3269 3319 (void) __cpuid_insn(cp);
3270 3320 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
3271 3321 switch (n) {
3272 3322 case 2:
3273 3323 /*
3274 3324 * "the lower 8 bits of the %eax register
3275 3325 * contain a value that identifies the number
3276 3326 * of times the cpuid [instruction] has to be
3277 3327 * executed to obtain a complete image of the
3278 3328 * processor's caching systems."
3279 3329 *
3280 3330 * How *do* they make this stuff up?
3281 3331 */
3282 3332 cpi->cpi_ncache = sizeof (*cp) *
3283 3333 BITX(cp->cp_eax, 7, 0);
3284 3334 if (cpi->cpi_ncache == 0)
3285 3335 break;
3286 3336 cpi->cpi_ncache--; /* skip count byte */
3287 3337
3288 3338 /*
3289 3339 * Well, for now, rather than attempt to implement
3290 3340 * this slightly dubious algorithm, we just look
3291 3341 * at the first 15 ..
3292 3342 */
3293 3343 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
3294 3344 cpi->cpi_ncache = sizeof (*cp) - 1;
3295 3345
3296 3346 dp = cpi->cpi_cacheinfo;
3297 3347 if (BITX(cp->cp_eax, 31, 31) == 0) {
3298 3348 uint8_t *p = (void *)&cp->cp_eax;
3299 3349 for (i = 1; i < 4; i++)
3300 3350 if (p[i] != 0)
3301 3351 *dp++ = p[i];
3302 3352 }
3303 3353 if (BITX(cp->cp_ebx, 31, 31) == 0) {
3304 3354 uint8_t *p = (void *)&cp->cp_ebx;
3305 3355 for (i = 0; i < 4; i++)
3306 3356 if (p[i] != 0)
3307 3357 *dp++ = p[i];
3308 3358 }
3309 3359 if (BITX(cp->cp_ecx, 31, 31) == 0) {
3310 3360 uint8_t *p = (void *)&cp->cp_ecx;
3311 3361 for (i = 0; i < 4; i++)
3312 3362 if (p[i] != 0)
3313 3363 *dp++ = p[i];
3314 3364 }
3315 3365 if (BITX(cp->cp_edx, 31, 31) == 0) {
3316 3366 uint8_t *p = (void *)&cp->cp_edx;
3317 3367 for (i = 0; i < 4; i++)
3318 3368 if (p[i] != 0)
3319 3369 *dp++ = p[i];
3320 3370 }
3321 3371 break;
3322 3372
3323 3373 case 3: /* Processor serial number, if PSN supported */
3324 3374 break;
3325 3375
3326 3376 case 4: /* Deterministic cache parameters */
3327 3377 break;
3328 3378
3329 3379 case 5: /* Monitor/Mwait parameters */
3330 3380 {
3331 3381 size_t mwait_size;
3332 3382
3333 3383 /*
3334 3384 * check cpi_mwait.support which was set in cpuid_pass1
3335 3385 */
3336 3386 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
3337 3387 break;
3338 3388
3339 3389 /*
3340 3390 * Protect ourself from insane mwait line size.
3341 3391 * Workaround for incomplete hardware emulator(s).
3342 3392 */
3343 3393 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
3344 3394 if (mwait_size < sizeof (uint32_t) ||
3345 3395 !ISP2(mwait_size)) {
3346 3396 #if DEBUG
3347 3397 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
3348 3398 "size %ld", cpu->cpu_id, (long)mwait_size);
3349 3399 #endif
3350 3400 break;
3351 3401 }
3352 3402
3353 3403 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
3354 3404 cpi->cpi_mwait.mon_max = mwait_size;
3355 3405 if (MWAIT_EXTENSION(cpi)) {
3356 3406 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
3357 3407 if (MWAIT_INT_ENABLE(cpi))
3358 3408 cpi->cpi_mwait.support |=
3359 3409 MWAIT_ECX_INT_ENABLE;
3360 3410 }
3361 3411 break;
3362 3412 }
3363 3413 default:
3364 3414 break;
3365 3415 }
3366 3416 }
3367 3417
3368 3418 /*
3369 3419 * XSAVE enumeration
3370 3420 */
3371 3421 if (cpi->cpi_maxeax >= 0xD) {
3372 3422 struct cpuid_regs regs;
3373 3423 boolean_t cpuid_d_valid = B_TRUE;
3374 3424
3375 3425 cp = ®s;
3376 3426 cp->cp_eax = 0xD;
3377 3427 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
3378 3428
3379 3429 (void) __cpuid_insn(cp);
3380 3430
3381 3431 /*
3382 3432 * Sanity checks for debug
3383 3433 */
3384 3434 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
3385 3435 (cp->cp_eax & XFEATURE_SSE) == 0) {
3386 3436 cpuid_d_valid = B_FALSE;
3387 3437 }
3388 3438
3389 3439 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
3390 3440 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
3391 3441 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
3392 3442
3393 3443 /*
3394 3444 * If the hw supports AVX, get the size and offset in the save
3395 3445 * area for the ymm state.
3396 3446 */
3397 3447 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
3398 3448 cp->cp_eax = 0xD;
3399 3449 cp->cp_ecx = 2;
3400 3450 cp->cp_edx = cp->cp_ebx = 0;
3401 3451
3402 3452 (void) __cpuid_insn(cp);
3403 3453
3404 3454 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
3405 3455 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
3406 3456 cpuid_d_valid = B_FALSE;
3407 3457 }
3408 3458
3409 3459 cpi->cpi_xsave.ymm_size = cp->cp_eax;
3410 3460 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
3411 3461 }
3412 3462
3413 3463 /*
3414 3464 * If the hw supports MPX, get the size and offset in the
3415 3465 * save area for BNDREGS and BNDCSR.
3416 3466 */
3417 3467 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_MPX) {
3418 3468 cp->cp_eax = 0xD;
3419 3469 cp->cp_ecx = 3;
3420 3470 cp->cp_edx = cp->cp_ebx = 0;
3421 3471
3422 3472 (void) __cpuid_insn(cp);
3423 3473
3424 3474 cpi->cpi_xsave.bndregs_size = cp->cp_eax;
3425 3475 cpi->cpi_xsave.bndregs_offset = cp->cp_ebx;
3426 3476
3427 3477 cp->cp_eax = 0xD;
3428 3478 cp->cp_ecx = 4;
3429 3479 cp->cp_edx = cp->cp_ebx = 0;
3430 3480
3431 3481 (void) __cpuid_insn(cp);
3432 3482
3433 3483 cpi->cpi_xsave.bndcsr_size = cp->cp_eax;
3434 3484 cpi->cpi_xsave.bndcsr_offset = cp->cp_ebx;
3435 3485 }
3436 3486
3437 3487 /*
3438 3488 * If the hw supports AVX512, get the size and offset in the
3439 3489 * save area for the opmask registers and zmm state.
3440 3490 */
3441 3491 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX512) {
3442 3492 cp->cp_eax = 0xD;
3443 3493 cp->cp_ecx = 5;
3444 3494 cp->cp_edx = cp->cp_ebx = 0;
3445 3495
3446 3496 (void) __cpuid_insn(cp);
3447 3497
3448 3498 cpi->cpi_xsave.opmask_size = cp->cp_eax;
3449 3499 cpi->cpi_xsave.opmask_offset = cp->cp_ebx;
3450 3500
3451 3501 cp->cp_eax = 0xD;
3452 3502 cp->cp_ecx = 6;
3453 3503 cp->cp_edx = cp->cp_ebx = 0;
3454 3504
3455 3505 (void) __cpuid_insn(cp);
3456 3506
3457 3507 cpi->cpi_xsave.zmmlo_size = cp->cp_eax;
3458 3508 cpi->cpi_xsave.zmmlo_offset = cp->cp_ebx;
3459 3509
3460 3510 cp->cp_eax = 0xD;
3461 3511 cp->cp_ecx = 7;
3462 3512 cp->cp_edx = cp->cp_ebx = 0;
3463 3513
3464 3514 (void) __cpuid_insn(cp);
3465 3515
3466 3516 cpi->cpi_xsave.zmmhi_size = cp->cp_eax;
3467 3517 cpi->cpi_xsave.zmmhi_offset = cp->cp_ebx;
3468 3518 }
3469 3519
3470 3520 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
3471 3521 xsave_state_size = 0;
3472 3522 } else if (cpuid_d_valid) {
3473 3523 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
3474 3524 } else {
3475 3525 /* Broken CPUID 0xD, probably in HVM */
3476 3526 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
3477 3527 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
3478 3528 ", ymm_size = %d, ymm_offset = %d\n",
3479 3529 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
3480 3530 cpi->cpi_xsave.xsav_hw_features_high,
3481 3531 (int)cpi->cpi_xsave.xsav_max_size,
3482 3532 (int)cpi->cpi_xsave.ymm_size,
3483 3533 (int)cpi->cpi_xsave.ymm_offset);
3484 3534
3485 3535 if (xsave_state_size != 0) {
3486 3536 /*
3487 3537 * This must be a non-boot CPU. We cannot
3488 3538 * continue, because boot cpu has already
3489 3539 * enabled XSAVE.
3490 3540 */
3491 3541 ASSERT(cpu->cpu_id != 0);
3492 3542 cmn_err(CE_PANIC, "cpu%d: we have already "
3493 3543 "enabled XSAVE on boot cpu, cannot "
3494 3544 "continue.", cpu->cpu_id);
3495 3545 } else {
3496 3546 /*
3497 3547 * If we reached here on the boot CPU, it's also
3498 3548 * almost certain that we'll reach here on the
3499 3549 * non-boot CPUs. When we're here on a boot CPU
3500 3550 * we should disable the feature, on a non-boot
3501 3551 * CPU we need to confirm that we have.
3502 3552 */
3503 3553 if (cpu->cpu_id == 0) {
3504 3554 remove_x86_feature(x86_featureset,
3505 3555 X86FSET_XSAVE);
3506 3556 remove_x86_feature(x86_featureset,
3507 3557 X86FSET_AVX);
3508 3558 remove_x86_feature(x86_featureset,
3509 3559 X86FSET_F16C);
3510 3560 remove_x86_feature(x86_featureset,
3511 3561 X86FSET_BMI1);
3512 3562 remove_x86_feature(x86_featureset,
3513 3563 X86FSET_BMI2);
3514 3564 remove_x86_feature(x86_featureset,
3515 3565 X86FSET_FMA);
3516 3566 remove_x86_feature(x86_featureset,
3517 3567 X86FSET_AVX2);
3518 3568 remove_x86_feature(x86_featureset,
3519 3569 X86FSET_MPX);
3520 3570 remove_x86_feature(x86_featureset,
3521 3571 X86FSET_AVX512F);
3522 3572 remove_x86_feature(x86_featureset,
3523 3573 X86FSET_AVX512DQ);
3524 3574 remove_x86_feature(x86_featureset,
3525 3575 X86FSET_AVX512PF);
3526 3576 remove_x86_feature(x86_featureset,
3527 3577 X86FSET_AVX512ER);
3528 3578 remove_x86_feature(x86_featureset,
3529 3579 X86FSET_AVX512CD);
3530 3580 remove_x86_feature(x86_featureset,
3531 3581 X86FSET_AVX512BW);
3532 3582 remove_x86_feature(x86_featureset,
3533 3583 X86FSET_AVX512VL);
3534 3584 remove_x86_feature(x86_featureset,
3535 3585 X86FSET_AVX512FMA);
3536 3586 remove_x86_feature(x86_featureset,
3537 3587 X86FSET_AVX512VBMI);
3538 3588 remove_x86_feature(x86_featureset,
3539 3589 X86FSET_AVX512VNNI);
3540 3590 remove_x86_feature(x86_featureset,
3541 3591 X86FSET_AVX512VPOPCDQ);
3542 3592 remove_x86_feature(x86_featureset,
3543 3593 X86FSET_AVX512NNIW);
3544 3594 remove_x86_feature(x86_featureset,
3545 3595 X86FSET_AVX512FMAPS);
3546 3596
3547 3597 CPI_FEATURES_ECX(cpi) &=
3548 3598 ~CPUID_INTC_ECX_XSAVE;
3549 3599 CPI_FEATURES_ECX(cpi) &=
3550 3600 ~CPUID_INTC_ECX_AVX;
3551 3601 CPI_FEATURES_ECX(cpi) &=
3552 3602 ~CPUID_INTC_ECX_F16C;
3553 3603 CPI_FEATURES_ECX(cpi) &=
3554 3604 ~CPUID_INTC_ECX_FMA;
3555 3605 CPI_FEATURES_7_0_EBX(cpi) &=
3556 3606 ~CPUID_INTC_EBX_7_0_BMI1;
3557 3607 CPI_FEATURES_7_0_EBX(cpi) &=
3558 3608 ~CPUID_INTC_EBX_7_0_BMI2;
3559 3609 CPI_FEATURES_7_0_EBX(cpi) &=
3560 3610 ~CPUID_INTC_EBX_7_0_AVX2;
3561 3611 CPI_FEATURES_7_0_EBX(cpi) &=
3562 3612 ~CPUID_INTC_EBX_7_0_MPX;
3563 3613 CPI_FEATURES_7_0_EBX(cpi) &=
3564 3614 ~CPUID_INTC_EBX_7_0_ALL_AVX512;
3565 3615
3566 3616 CPI_FEATURES_7_0_ECX(cpi) &=
3567 3617 ~CPUID_INTC_ECX_7_0_ALL_AVX512;
3568 3618
3569 3619 CPI_FEATURES_7_0_EDX(cpi) &=
3570 3620 ~CPUID_INTC_EDX_7_0_ALL_AVX512;
3571 3621
3572 3622 xsave_force_disable = B_TRUE;
3573 3623 } else {
3574 3624 VERIFY(is_x86_feature(x86_featureset,
3575 3625 X86FSET_XSAVE) == B_FALSE);
3576 3626 }
3577 3627 }
3578 3628 }
3579 3629 }
3580 3630
3581 3631
3582 3632 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0)
3583 3633 goto pass2_done;
3584 3634
3585 3635 if ((nmax = cpi->cpi_xmaxeax - CPUID_LEAF_EXT_0 + 1) > NMAX_CPI_EXTD)
3586 3636 nmax = NMAX_CPI_EXTD;
3587 3637 /*
3588 3638 * Copy the extended properties, fixing them as we go.
3589 3639 * (We already handled n == 0 and n == 1 in pass 1)
3590 3640 */
3591 3641 iptr = (void *)cpi->cpi_brandstr;
3592 3642 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
3593 3643 cp->cp_eax = CPUID_LEAF_EXT_0 + n;
3594 3644 (void) __cpuid_insn(cp);
3595 3645 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_0 + n,
3596 3646 cp);
3597 3647 switch (n) {
3598 3648 case 2:
3599 3649 case 3:
3600 3650 case 4:
3601 3651 /*
3602 3652 * Extract the brand string
3603 3653 */
3604 3654 *iptr++ = cp->cp_eax;
3605 3655 *iptr++ = cp->cp_ebx;
3606 3656 *iptr++ = cp->cp_ecx;
3607 3657 *iptr++ = cp->cp_edx;
3608 3658 break;
3609 3659 case 5:
3610 3660 switch (cpi->cpi_vendor) {
3611 3661 case X86_VENDOR_AMD:
3612 3662 /*
3613 3663 * The Athlon and Duron were the first
3614 3664 * parts to report the sizes of the
3615 3665 * TLB for large pages. Before then,
3616 3666 * we don't trust the data.
3617 3667 */
3618 3668 if (cpi->cpi_family < 6 ||
3619 3669 (cpi->cpi_family == 6 &&
3620 3670 cpi->cpi_model < 1))
3621 3671 cp->cp_eax = 0;
3622 3672 break;
3623 3673 default:
3624 3674 break;
3625 3675 }
3626 3676 break;
3627 3677 case 6:
3628 3678 switch (cpi->cpi_vendor) {
3629 3679 case X86_VENDOR_AMD:
3630 3680 /*
3631 3681 * The Athlon and Duron were the first
3632 3682 * AMD parts with L2 TLB's.
3633 3683 * Before then, don't trust the data.
3634 3684 */
3635 3685 if (cpi->cpi_family < 6 ||
3636 3686 cpi->cpi_family == 6 &&
3637 3687 cpi->cpi_model < 1)
3638 3688 cp->cp_eax = cp->cp_ebx = 0;
3639 3689 /*
3640 3690 * AMD Duron rev A0 reports L2
3641 3691 * cache size incorrectly as 1K
3642 3692 * when it is really 64K
3643 3693 */
3644 3694 if (cpi->cpi_family == 6 &&
3645 3695 cpi->cpi_model == 3 &&
3646 3696 cpi->cpi_step == 0) {
3647 3697 cp->cp_ecx &= 0xffff;
3648 3698 cp->cp_ecx |= 0x400000;
3649 3699 }
3650 3700 break;
3651 3701 case X86_VENDOR_Cyrix: /* VIA C3 */
3652 3702 /*
3653 3703 * VIA C3 processors are a bit messed
3654 3704 * up w.r.t. encoding cache sizes in %ecx
3655 3705 */
3656 3706 if (cpi->cpi_family != 6)
3657 3707 break;
3658 3708 /*
3659 3709 * model 7 and 8 were incorrectly encoded
3660 3710 *
3661 3711 * xxx is model 8 really broken?
3662 3712 */
3663 3713 if (cpi->cpi_model == 7 ||
3664 3714 cpi->cpi_model == 8)
3665 3715 cp->cp_ecx =
3666 3716 BITX(cp->cp_ecx, 31, 24) << 16 |
3667 3717 BITX(cp->cp_ecx, 23, 16) << 12 |
3668 3718 BITX(cp->cp_ecx, 15, 8) << 8 |
3669 3719 BITX(cp->cp_ecx, 7, 0);
3670 3720 /*
3671 3721 * model 9 stepping 1 has wrong associativity
3672 3722 */
3673 3723 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
3674 3724 cp->cp_ecx |= 8 << 12;
3675 3725 break;
3676 3726 case X86_VENDOR_Intel:
3677 3727 /*
3678 3728 * Extended L2 Cache features function.
3679 3729 * First appeared on Prescott.
3680 3730 */
3681 3731 default:
3682 3732 break;
3683 3733 }
3684 3734 break;
3685 3735 default:
3686 3736 break;
3687 3737 }
3688 3738 }
3689 3739
3690 3740 pass2_done:
3691 3741 cpi->cpi_pass = 2;
3692 3742 }
3693 3743
3694 3744 static const char *
3695 3745 intel_cpubrand(const struct cpuid_info *cpi)
3696 3746 {
3697 3747 int i;
3698 3748
3699 3749 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3700 3750 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3701 3751 return ("i486");
3702 3752
3703 3753 switch (cpi->cpi_family) {
3704 3754 case 5:
3705 3755 return ("Intel Pentium(r)");
3706 3756 case 6:
3707 3757 switch (cpi->cpi_model) {
3708 3758 uint_t celeron, xeon;
3709 3759 const struct cpuid_regs *cp;
3710 3760 case 0:
3711 3761 case 1:
3712 3762 case 2:
3713 3763 return ("Intel Pentium(r) Pro");
3714 3764 case 3:
3715 3765 case 4:
3716 3766 return ("Intel Pentium(r) II");
3717 3767 case 6:
3718 3768 return ("Intel Celeron(r)");
3719 3769 case 5:
3720 3770 case 7:
3721 3771 celeron = xeon = 0;
3722 3772 cp = &cpi->cpi_std[2]; /* cache info */
3723 3773
3724 3774 for (i = 1; i < 4; i++) {
3725 3775 uint_t tmp;
3726 3776
3727 3777 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
3728 3778 if (tmp == 0x40)
3729 3779 celeron++;
3730 3780 if (tmp >= 0x44 && tmp <= 0x45)
3731 3781 xeon++;
3732 3782 }
3733 3783
3734 3784 for (i = 0; i < 2; i++) {
3735 3785 uint_t tmp;
3736 3786
3737 3787 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
3738 3788 if (tmp == 0x40)
3739 3789 celeron++;
3740 3790 else if (tmp >= 0x44 && tmp <= 0x45)
3741 3791 xeon++;
3742 3792 }
3743 3793
3744 3794 for (i = 0; i < 4; i++) {
3745 3795 uint_t tmp;
3746 3796
3747 3797 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
3748 3798 if (tmp == 0x40)
3749 3799 celeron++;
3750 3800 else if (tmp >= 0x44 && tmp <= 0x45)
3751 3801 xeon++;
3752 3802 }
3753 3803
3754 3804 for (i = 0; i < 4; i++) {
3755 3805 uint_t tmp;
3756 3806
3757 3807 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
3758 3808 if (tmp == 0x40)
3759 3809 celeron++;
3760 3810 else if (tmp >= 0x44 && tmp <= 0x45)
3761 3811 xeon++;
3762 3812 }
3763 3813
3764 3814 if (celeron)
3765 3815 return ("Intel Celeron(r)");
3766 3816 if (xeon)
3767 3817 return (cpi->cpi_model == 5 ?
3768 3818 "Intel Pentium(r) II Xeon(tm)" :
3769 3819 "Intel Pentium(r) III Xeon(tm)");
3770 3820 return (cpi->cpi_model == 5 ?
3771 3821 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
3772 3822 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
3773 3823 default:
3774 3824 break;
3775 3825 }
3776 3826 default:
3777 3827 break;
3778 3828 }
3779 3829
3780 3830 /* BrandID is present if the field is nonzero */
3781 3831 if (cpi->cpi_brandid != 0) {
3782 3832 static const struct {
3783 3833 uint_t bt_bid;
3784 3834 const char *bt_str;
3785 3835 } brand_tbl[] = {
3786 3836 { 0x1, "Intel(r) Celeron(r)" },
3787 3837 { 0x2, "Intel(r) Pentium(r) III" },
3788 3838 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
3789 3839 { 0x4, "Intel(r) Pentium(r) III" },
3790 3840 { 0x6, "Mobile Intel(r) Pentium(r) III" },
3791 3841 { 0x7, "Mobile Intel(r) Celeron(r)" },
3792 3842 { 0x8, "Intel(r) Pentium(r) 4" },
3793 3843 { 0x9, "Intel(r) Pentium(r) 4" },
3794 3844 { 0xa, "Intel(r) Celeron(r)" },
3795 3845 { 0xb, "Intel(r) Xeon(tm)" },
3796 3846 { 0xc, "Intel(r) Xeon(tm) MP" },
3797 3847 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
3798 3848 { 0xf, "Mobile Intel(r) Celeron(r)" },
3799 3849 { 0x11, "Mobile Genuine Intel(r)" },
3800 3850 { 0x12, "Intel(r) Celeron(r) M" },
3801 3851 { 0x13, "Mobile Intel(r) Celeron(r)" },
3802 3852 { 0x14, "Intel(r) Celeron(r)" },
3803 3853 { 0x15, "Mobile Genuine Intel(r)" },
3804 3854 { 0x16, "Intel(r) Pentium(r) M" },
3805 3855 { 0x17, "Mobile Intel(r) Celeron(r)" }
3806 3856 };
3807 3857 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
3808 3858 uint_t sgn;
3809 3859
3810 3860 sgn = (cpi->cpi_family << 8) |
3811 3861 (cpi->cpi_model << 4) | cpi->cpi_step;
3812 3862
3813 3863 for (i = 0; i < btblmax; i++)
3814 3864 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
3815 3865 break;
3816 3866 if (i < btblmax) {
3817 3867 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
3818 3868 return ("Intel(r) Celeron(r)");
3819 3869 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
3820 3870 return ("Intel(r) Xeon(tm) MP");
3821 3871 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
3822 3872 return ("Intel(r) Xeon(tm)");
3823 3873 return (brand_tbl[i].bt_str);
3824 3874 }
3825 3875 }
3826 3876
3827 3877 return (NULL);
3828 3878 }
3829 3879
3830 3880 static const char *
3831 3881 amd_cpubrand(const struct cpuid_info *cpi)
3832 3882 {
3833 3883 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3834 3884 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
3835 3885 return ("i486 compatible");
3836 3886
3837 3887 switch (cpi->cpi_family) {
3838 3888 case 5:
3839 3889 switch (cpi->cpi_model) {
3840 3890 case 0:
3841 3891 case 1:
3842 3892 case 2:
3843 3893 case 3:
3844 3894 case 4:
3845 3895 case 5:
3846 3896 return ("AMD-K5(r)");
3847 3897 case 6:
3848 3898 case 7:
3849 3899 return ("AMD-K6(r)");
3850 3900 case 8:
3851 3901 return ("AMD-K6(r)-2");
3852 3902 case 9:
3853 3903 return ("AMD-K6(r)-III");
3854 3904 default:
3855 3905 return ("AMD (family 5)");
3856 3906 }
3857 3907 case 6:
3858 3908 switch (cpi->cpi_model) {
3859 3909 case 1:
3860 3910 return ("AMD-K7(tm)");
3861 3911 case 0:
3862 3912 case 2:
3863 3913 case 4:
3864 3914 return ("AMD Athlon(tm)");
3865 3915 case 3:
3866 3916 case 7:
3867 3917 return ("AMD Duron(tm)");
3868 3918 case 6:
3869 3919 case 8:
3870 3920 case 10:
3871 3921 /*
3872 3922 * Use the L2 cache size to distinguish
3873 3923 */
3874 3924 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
3875 3925 "AMD Athlon(tm)" : "AMD Duron(tm)");
3876 3926 default:
3877 3927 return ("AMD (family 6)");
3878 3928 }
3879 3929 default:
3880 3930 break;
3881 3931 }
3882 3932
3883 3933 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
3884 3934 cpi->cpi_brandid != 0) {
3885 3935 switch (BITX(cpi->cpi_brandid, 7, 5)) {
3886 3936 case 3:
3887 3937 return ("AMD Opteron(tm) UP 1xx");
3888 3938 case 4:
3889 3939 return ("AMD Opteron(tm) DP 2xx");
3890 3940 case 5:
3891 3941 return ("AMD Opteron(tm) MP 8xx");
3892 3942 default:
3893 3943 return ("AMD Opteron(tm)");
3894 3944 }
3895 3945 }
3896 3946
3897 3947 return (NULL);
3898 3948 }
3899 3949
3900 3950 static const char *
3901 3951 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
3902 3952 {
3903 3953 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
3904 3954 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
3905 3955 type == X86_TYPE_CYRIX_486)
3906 3956 return ("i486 compatible");
3907 3957
3908 3958 switch (type) {
3909 3959 case X86_TYPE_CYRIX_6x86:
3910 3960 return ("Cyrix 6x86");
3911 3961 case X86_TYPE_CYRIX_6x86L:
3912 3962 return ("Cyrix 6x86L");
3913 3963 case X86_TYPE_CYRIX_6x86MX:
3914 3964 return ("Cyrix 6x86MX");
3915 3965 case X86_TYPE_CYRIX_GXm:
3916 3966 return ("Cyrix GXm");
3917 3967 case X86_TYPE_CYRIX_MediaGX:
3918 3968 return ("Cyrix MediaGX");
3919 3969 case X86_TYPE_CYRIX_MII:
3920 3970 return ("Cyrix M2");
3921 3971 case X86_TYPE_VIA_CYRIX_III:
3922 3972 return ("VIA Cyrix M3");
3923 3973 default:
3924 3974 /*
3925 3975 * Have another wild guess ..
3926 3976 */
3927 3977 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
3928 3978 return ("Cyrix 5x86");
3929 3979 else if (cpi->cpi_family == 5) {
3930 3980 switch (cpi->cpi_model) {
3931 3981 case 2:
3932 3982 return ("Cyrix 6x86"); /* Cyrix M1 */
3933 3983 case 4:
3934 3984 return ("Cyrix MediaGX");
3935 3985 default:
3936 3986 break;
3937 3987 }
3938 3988 } else if (cpi->cpi_family == 6) {
3939 3989 switch (cpi->cpi_model) {
3940 3990 case 0:
3941 3991 return ("Cyrix 6x86MX"); /* Cyrix M2? */
3942 3992 case 5:
3943 3993 case 6:
3944 3994 case 7:
3945 3995 case 8:
3946 3996 case 9:
3947 3997 return ("VIA C3");
3948 3998 default:
3949 3999 break;
3950 4000 }
3951 4001 }
3952 4002 break;
3953 4003 }
3954 4004 return (NULL);
3955 4005 }
3956 4006
3957 4007 /*
3958 4008 * This only gets called in the case that the CPU extended
3959 4009 * feature brand string (0x80000002, 0x80000003, 0x80000004)
3960 4010 * aren't available, or contain null bytes for some reason.
3961 4011 */
3962 4012 static void
3963 4013 fabricate_brandstr(struct cpuid_info *cpi)
3964 4014 {
3965 4015 const char *brand = NULL;
3966 4016
3967 4017 switch (cpi->cpi_vendor) {
3968 4018 case X86_VENDOR_Intel:
3969 4019 brand = intel_cpubrand(cpi);
3970 4020 break;
3971 4021 case X86_VENDOR_AMD:
3972 4022 brand = amd_cpubrand(cpi);
3973 4023 break;
3974 4024 case X86_VENDOR_Cyrix:
3975 4025 brand = cyrix_cpubrand(cpi, x86_type);
3976 4026 break;
3977 4027 case X86_VENDOR_NexGen:
3978 4028 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
3979 4029 brand = "NexGen Nx586";
3980 4030 break;
3981 4031 case X86_VENDOR_Centaur:
3982 4032 if (cpi->cpi_family == 5)
3983 4033 switch (cpi->cpi_model) {
3984 4034 case 4:
3985 4035 brand = "Centaur C6";
3986 4036 break;
3987 4037 case 8:
3988 4038 brand = "Centaur C2";
3989 4039 break;
3990 4040 case 9:
3991 4041 brand = "Centaur C3";
3992 4042 break;
3993 4043 default:
3994 4044 break;
3995 4045 }
3996 4046 break;
3997 4047 case X86_VENDOR_Rise:
3998 4048 if (cpi->cpi_family == 5 &&
3999 4049 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
4000 4050 brand = "Rise mP6";
4001 4051 break;
4002 4052 case X86_VENDOR_SiS:
4003 4053 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
4004 4054 brand = "SiS 55x";
4005 4055 break;
4006 4056 case X86_VENDOR_TM:
4007 4057 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
4008 4058 brand = "Transmeta Crusoe TM3x00 or TM5x00";
4009 4059 break;
4010 4060 case X86_VENDOR_NSC:
4011 4061 case X86_VENDOR_UMC:
4012 4062 default:
4013 4063 break;
4014 4064 }
4015 4065 if (brand) {
4016 4066 (void) strcpy((char *)cpi->cpi_brandstr, brand);
4017 4067 return;
4018 4068 }
4019 4069
4020 4070 /*
4021 4071 * If all else fails ...
4022 4072 */
4023 4073 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
4024 4074 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
4025 4075 cpi->cpi_model, cpi->cpi_step);
4026 4076 }
4027 4077
4028 4078 /*
4029 4079 * This routine is called just after kernel memory allocation
4030 4080 * becomes available on cpu0, and as part of mp_startup() on
4031 4081 * the other cpus.
4032 4082 *
4033 4083 * Fixup the brand string, and collect any information from cpuid
4034 4084 * that requires dynamically allocated storage to represent.
4035 4085 */
4036 4086 /*ARGSUSED*/
4037 4087 void
4038 4088 cpuid_pass3(cpu_t *cpu)
4039 4089 {
4040 4090 int i, max, shft, level, size;
4041 4091 struct cpuid_regs regs;
4042 4092 struct cpuid_regs *cp;
4043 4093 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4044 4094
4045 4095 ASSERT(cpi->cpi_pass == 2);
4046 4096
4047 4097 /*
4048 4098 * Deterministic cache parameters
4049 4099 *
4050 4100 * Intel uses leaf 0x4 for this, while AMD uses leaf 0x8000001d. The
4051 4101 * values that are present are currently defined to be the same. This
4052 4102 * means we can use the same logic to parse it as long as we use the
4053 4103 * appropriate leaf to get the data. If you're updating this, make sure
4054 4104 * you're careful about which vendor supports which aspect.
4055 4105 *
4056 4106 * Take this opportunity to detect the number of threads sharing the
4057 4107 * last level cache, and construct a corresponding cache id. The
4058 4108 * respective cpuid_info members are initialized to the default case of
4059 4109 * "no last level cache sharing".
4060 4110 */
4061 4111 cpi->cpi_ncpu_shr_last_cache = 1;
4062 4112 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
4063 4113
4064 4114 if ((cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) ||
4065 4115 (cpi->cpi_vendor == X86_VENDOR_AMD &&
4066 4116 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_1d &&
4067 4117 is_x86_feature(x86_featureset, X86FSET_TOPOEXT))) {
4068 4118 uint32_t leaf;
4069 4119
4070 4120 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4071 4121 leaf = 4;
4072 4122 } else {
4073 4123 leaf = CPUID_LEAF_EXT_1d;
4074 4124 }
4075 4125
4076 4126 /*
4077 4127 * Find the # of elements (size) returned by the leaf and along
4078 4128 * the way detect last level cache sharing details.
4079 4129 */
4080 4130 bzero(®s, sizeof (regs));
4081 4131 cp = ®s;
4082 4132 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
4083 4133 cp->cp_eax = leaf;
4084 4134 cp->cp_ecx = i;
4085 4135
4086 4136 (void) __cpuid_insn(cp);
4087 4137
4088 4138 if (CPI_CACHE_TYPE(cp) == 0)
4089 4139 break;
4090 4140 level = CPI_CACHE_LVL(cp);
4091 4141 if (level > max) {
4092 4142 max = level;
4093 4143 cpi->cpi_ncpu_shr_last_cache =
4094 4144 CPI_NTHR_SHR_CACHE(cp) + 1;
4095 4145 }
4096 4146 }
4097 4147 cpi->cpi_cache_leaf_size = size = i;
4098 4148
4099 4149 /*
4100 4150 * Allocate the cpi_cache_leaves array. The first element
4101 4151 * references the regs for the corresponding leaf with %ecx set
4102 4152 * to 0. This was gathered in cpuid_pass2().
4103 4153 */
4104 4154 if (size > 0) {
4105 4155 cpi->cpi_cache_leaves =
4106 4156 kmem_alloc(size * sizeof (cp), KM_SLEEP);
4107 4157 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
4108 4158 cpi->cpi_cache_leaves[0] = &cpi->cpi_std[4];
4109 4159 } else {
4110 4160 cpi->cpi_cache_leaves[0] = &cpi->cpi_extd[0x1d];
4111 4161 }
4112 4162
4113 4163 /*
4114 4164 * Allocate storage to hold the additional regs
4115 4165 * for the leaf, %ecx == 1 .. cpi_cache_leaf_size.
4116 4166 *
4117 4167 * The regs for the leaf, %ecx == 0 has already
4118 4168 * been allocated as indicated above.
4119 4169 */
4120 4170 for (i = 1; i < size; i++) {
4121 4171 cp = cpi->cpi_cache_leaves[i] =
4122 4172 kmem_zalloc(sizeof (regs), KM_SLEEP);
4123 4173 cp->cp_eax = leaf;
4124 4174 cp->cp_ecx = i;
4125 4175
4126 4176 (void) __cpuid_insn(cp);
4127 4177 }
4128 4178 }
4129 4179 /*
4130 4180 * Determine the number of bits needed to represent
4131 4181 * the number of CPUs sharing the last level cache.
4132 4182 *
4133 4183 * Shift off that number of bits from the APIC id to
4134 4184 * derive the cache id.
4135 4185 */
4136 4186 shft = 0;
4137 4187 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
4138 4188 shft++;
4139 4189 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
4140 4190 }
4141 4191
4142 4192 /*
4143 4193 * Now fixup the brand string
4144 4194 */
4145 4195 if ((cpi->cpi_xmaxeax & CPUID_LEAF_EXT_0) == 0) {
4146 4196 fabricate_brandstr(cpi);
4147 4197 } else {
4148 4198
4149 4199 /*
4150 4200 * If we successfully extracted a brand string from the cpuid
4151 4201 * instruction, clean it up by removing leading spaces and
4152 4202 * similar junk.
4153 4203 */
4154 4204 if (cpi->cpi_brandstr[0]) {
4155 4205 size_t maxlen = sizeof (cpi->cpi_brandstr);
4156 4206 char *src, *dst;
4157 4207
4158 4208 dst = src = (char *)cpi->cpi_brandstr;
4159 4209 src[maxlen - 1] = '\0';
4160 4210 /*
4161 4211 * strip leading spaces
4162 4212 */
4163 4213 while (*src == ' ')
4164 4214 src++;
4165 4215 /*
4166 4216 * Remove any 'Genuine' or "Authentic" prefixes
4167 4217 */
4168 4218 if (strncmp(src, "Genuine ", 8) == 0)
4169 4219 src += 8;
4170 4220 if (strncmp(src, "Authentic ", 10) == 0)
4171 4221 src += 10;
4172 4222
4173 4223 /*
4174 4224 * Now do an in-place copy.
4175 4225 * Map (R) to (r) and (TM) to (tm).
4176 4226 * The era of teletypes is long gone, and there's
4177 4227 * -really- no need to shout.
4178 4228 */
4179 4229 while (*src != '\0') {
4180 4230 if (src[0] == '(') {
4181 4231 if (strncmp(src + 1, "R)", 2) == 0) {
4182 4232 (void) strncpy(dst, "(r)", 3);
4183 4233 src += 3;
4184 4234 dst += 3;
4185 4235 continue;
4186 4236 }
4187 4237 if (strncmp(src + 1, "TM)", 3) == 0) {
4188 4238 (void) strncpy(dst, "(tm)", 4);
4189 4239 src += 4;
4190 4240 dst += 4;
4191 4241 continue;
4192 4242 }
4193 4243 }
4194 4244 *dst++ = *src++;
4195 4245 }
4196 4246 *dst = '\0';
4197 4247
4198 4248 /*
4199 4249 * Finally, remove any trailing spaces
4200 4250 */
4201 4251 while (--dst > cpi->cpi_brandstr)
4202 4252 if (*dst == ' ')
4203 4253 *dst = '\0';
4204 4254 else
4205 4255 break;
4206 4256 } else
4207 4257 fabricate_brandstr(cpi);
4208 4258 }
4209 4259 cpi->cpi_pass = 3;
4210 4260 }
4211 4261
4212 4262 /*
4213 4263 * This routine is called out of bind_hwcap() much later in the life
4214 4264 * of the kernel (post_startup()). The job of this routine is to resolve
4215 4265 * the hardware feature support and kernel support for those features into
4216 4266 * what we're actually going to tell applications via the aux vector.
4217 4267 */
4218 4268 void
4219 4269 cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
4220 4270 {
4221 4271 struct cpuid_info *cpi;
4222 4272 uint_t hwcap_flags = 0, hwcap_flags_2 = 0;
4223 4273
4224 4274 if (cpu == NULL)
4225 4275 cpu = CPU;
4226 4276 cpi = cpu->cpu_m.mcpu_cpi;
4227 4277
4228 4278 ASSERT(cpi->cpi_pass == 3);
4229 4279
4230 4280 if (cpi->cpi_maxeax >= 1) {
4231 4281 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
4232 4282 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
4233 4283 uint32_t *ebx = &cpi->cpi_support[STD_EBX_FEATURES];
4234 4284
4235 4285 *edx = CPI_FEATURES_EDX(cpi);
4236 4286 *ecx = CPI_FEATURES_ECX(cpi);
4237 4287 *ebx = CPI_FEATURES_7_0_EBX(cpi);
4238 4288
4239 4289 /*
4240 4290 * [these require explicit kernel support]
4241 4291 */
4242 4292 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
4243 4293 *edx &= ~CPUID_INTC_EDX_SEP;
4244 4294
4245 4295 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
4246 4296 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
4247 4297 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
4248 4298 *edx &= ~CPUID_INTC_EDX_SSE2;
4249 4299
4250 4300 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
4251 4301 *edx &= ~CPUID_INTC_EDX_HTT;
4252 4302
4253 4303 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
4254 4304 *ecx &= ~CPUID_INTC_ECX_SSE3;
4255 4305
4256 4306 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
4257 4307 *ecx &= ~CPUID_INTC_ECX_SSSE3;
4258 4308 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
4259 4309 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
4260 4310 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
4261 4311 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
4262 4312 if (!is_x86_feature(x86_featureset, X86FSET_AES))
4263 4313 *ecx &= ~CPUID_INTC_ECX_AES;
4264 4314 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
4265 4315 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
4266 4316 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
4267 4317 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
4268 4318 CPUID_INTC_ECX_OSXSAVE);
4269 4319 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
4270 4320 *ecx &= ~CPUID_INTC_ECX_AVX;
4271 4321 if (!is_x86_feature(x86_featureset, X86FSET_F16C))
4272 4322 *ecx &= ~CPUID_INTC_ECX_F16C;
4273 4323 if (!is_x86_feature(x86_featureset, X86FSET_FMA))
4274 4324 *ecx &= ~CPUID_INTC_ECX_FMA;
4275 4325 if (!is_x86_feature(x86_featureset, X86FSET_BMI1))
4276 4326 *ebx &= ~CPUID_INTC_EBX_7_0_BMI1;
4277 4327 if (!is_x86_feature(x86_featureset, X86FSET_BMI2))
4278 4328 *ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
4279 4329 if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
4280 4330 *ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
4281 4331 if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
4282 4332 *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
4283 4333 if (!is_x86_feature(x86_featureset, X86FSET_ADX))
4284 4334 *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
4285 4335
4286 4336 /*
4287 4337 * [no explicit support required beyond x87 fp context]
4288 4338 */
4289 4339 if (!fpu_exists)
4290 4340 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
4291 4341
4292 4342 /*
4293 4343 * Now map the supported feature vector to things that we
4294 4344 * think userland will care about.
4295 4345 */
4296 4346 if (*edx & CPUID_INTC_EDX_SEP)
4297 4347 hwcap_flags |= AV_386_SEP;
4298 4348 if (*edx & CPUID_INTC_EDX_SSE)
4299 4349 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
4300 4350 if (*edx & CPUID_INTC_EDX_SSE2)
4301 4351 hwcap_flags |= AV_386_SSE2;
4302 4352 if (*ecx & CPUID_INTC_ECX_SSE3)
4303 4353 hwcap_flags |= AV_386_SSE3;
4304 4354 if (*ecx & CPUID_INTC_ECX_SSSE3)
4305 4355 hwcap_flags |= AV_386_SSSE3;
4306 4356 if (*ecx & CPUID_INTC_ECX_SSE4_1)
4307 4357 hwcap_flags |= AV_386_SSE4_1;
4308 4358 if (*ecx & CPUID_INTC_ECX_SSE4_2)
4309 4359 hwcap_flags |= AV_386_SSE4_2;
4310 4360 if (*ecx & CPUID_INTC_ECX_MOVBE)
4311 4361 hwcap_flags |= AV_386_MOVBE;
4312 4362 if (*ecx & CPUID_INTC_ECX_AES)
4313 4363 hwcap_flags |= AV_386_AES;
4314 4364 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
4315 4365 hwcap_flags |= AV_386_PCLMULQDQ;
4316 4366 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
4317 4367 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
4318 4368 hwcap_flags |= AV_386_XSAVE;
4319 4369
4320 4370 if (*ecx & CPUID_INTC_ECX_AVX) {
4321 4371 uint32_t *ecx_7 = &CPI_FEATURES_7_0_ECX(cpi);
4322 4372 uint32_t *edx_7 = &CPI_FEATURES_7_0_EDX(cpi);
4323 4373
4324 4374 hwcap_flags |= AV_386_AVX;
4325 4375 if (*ecx & CPUID_INTC_ECX_F16C)
4326 4376 hwcap_flags_2 |= AV_386_2_F16C;
4327 4377 if (*ecx & CPUID_INTC_ECX_FMA)
4328 4378 hwcap_flags_2 |= AV_386_2_FMA;
4329 4379
4330 4380 if (*ebx & CPUID_INTC_EBX_7_0_BMI1)
4331 4381 hwcap_flags_2 |= AV_386_2_BMI1;
4332 4382 if (*ebx & CPUID_INTC_EBX_7_0_BMI2)
4333 4383 hwcap_flags_2 |= AV_386_2_BMI2;
4334 4384 if (*ebx & CPUID_INTC_EBX_7_0_AVX2)
4335 4385 hwcap_flags_2 |= AV_386_2_AVX2;
4336 4386 if (*ebx & CPUID_INTC_EBX_7_0_AVX512F)
4337 4387 hwcap_flags_2 |= AV_386_2_AVX512F;
4338 4388 if (*ebx & CPUID_INTC_EBX_7_0_AVX512DQ)
4339 4389 hwcap_flags_2 |= AV_386_2_AVX512DQ;
4340 4390 if (*ebx & CPUID_INTC_EBX_7_0_AVX512IFMA)
4341 4391 hwcap_flags_2 |= AV_386_2_AVX512IFMA;
4342 4392 if (*ebx & CPUID_INTC_EBX_7_0_AVX512PF)
4343 4393 hwcap_flags_2 |= AV_386_2_AVX512PF;
4344 4394 if (*ebx & CPUID_INTC_EBX_7_0_AVX512ER)
4345 4395 hwcap_flags_2 |= AV_386_2_AVX512ER;
4346 4396 if (*ebx & CPUID_INTC_EBX_7_0_AVX512CD)
4347 4397 hwcap_flags_2 |= AV_386_2_AVX512CD;
4348 4398 if (*ebx & CPUID_INTC_EBX_7_0_AVX512BW)
4349 4399 hwcap_flags_2 |= AV_386_2_AVX512BW;
4350 4400 if (*ebx & CPUID_INTC_EBX_7_0_AVX512VL)
4351 4401 hwcap_flags_2 |= AV_386_2_AVX512VL;
4352 4402
4353 4403 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VBMI)
4354 4404 hwcap_flags_2 |= AV_386_2_AVX512VBMI;
4355 4405 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VNNI)
4356 4406 hwcap_flags_2 |= AV_386_2_AVX512_VNNI;
4357 4407 if (*ecx_7 & CPUID_INTC_ECX_7_0_AVX512VPOPCDQ)
4358 4408 hwcap_flags_2 |= AV_386_2_AVX512VPOPCDQ;
4359 4409
4360 4410 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124NNIW)
4361 4411 hwcap_flags_2 |= AV_386_2_AVX512_4NNIW;
4362 4412 if (*edx_7 & CPUID_INTC_EDX_7_0_AVX5124FMAPS)
4363 4413 hwcap_flags_2 |= AV_386_2_AVX512_4FMAPS;
4364 4414 }
4365 4415 }
4366 4416 if (*ecx & CPUID_INTC_ECX_VMX)
4367 4417 hwcap_flags |= AV_386_VMX;
4368 4418 if (*ecx & CPUID_INTC_ECX_POPCNT)
4369 4419 hwcap_flags |= AV_386_POPCNT;
4370 4420 if (*edx & CPUID_INTC_EDX_FPU)
4371 4421 hwcap_flags |= AV_386_FPU;
4372 4422 if (*edx & CPUID_INTC_EDX_MMX)
4373 4423 hwcap_flags |= AV_386_MMX;
4374 4424
4375 4425 if (*edx & CPUID_INTC_EDX_TSC)
4376 4426 hwcap_flags |= AV_386_TSC;
4377 4427 if (*edx & CPUID_INTC_EDX_CX8)
4378 4428 hwcap_flags |= AV_386_CX8;
4379 4429 if (*edx & CPUID_INTC_EDX_CMOV)
4380 4430 hwcap_flags |= AV_386_CMOV;
4381 4431 if (*ecx & CPUID_INTC_ECX_CX16)
4382 4432 hwcap_flags |= AV_386_CX16;
4383 4433
4384 4434 if (*ecx & CPUID_INTC_ECX_RDRAND)
4385 4435 hwcap_flags_2 |= AV_386_2_RDRAND;
4386 4436 if (*ebx & CPUID_INTC_EBX_7_0_ADX)
4387 4437 hwcap_flags_2 |= AV_386_2_ADX;
4388 4438 if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
4389 4439 hwcap_flags_2 |= AV_386_2_RDSEED;
4390 4440 if (*ebx & CPUID_INTC_EBX_7_0_SHA)
4391 4441 hwcap_flags_2 |= AV_386_2_SHA;
4392 4442 if (*ebx & CPUID_INTC_EBX_7_0_FSGSBASE)
4393 4443 hwcap_flags_2 |= AV_386_2_FSGSBASE;
4394 4444 if (*ebx & CPUID_INTC_EBX_7_0_CLWB)
4395 4445 hwcap_flags_2 |= AV_386_2_CLWB;
4396 4446 if (*ebx & CPUID_INTC_EBX_7_0_CLFLUSHOPT)
4397 4447 hwcap_flags_2 |= AV_386_2_CLFLUSHOPT;
4398 4448
4399 4449 }
4400 4450 /*
4401 4451 * Check a few miscilaneous features.
4402 4452 */
4403 4453 if (is_x86_feature(x86_featureset, X86FSET_CLZERO))
4404 4454 hwcap_flags_2 |= AV_386_2_CLZERO;
4405 4455
4406 4456 if (cpi->cpi_xmaxeax < 0x80000001)
4407 4457 goto pass4_done;
4408 4458
4409 4459 switch (cpi->cpi_vendor) {
4410 4460 struct cpuid_regs cp;
4411 4461 uint32_t *edx, *ecx;
4412 4462
4413 4463 case X86_VENDOR_Intel:
4414 4464 /*
4415 4465 * Seems like Intel duplicated what we necessary
4416 4466 * here to make the initial crop of 64-bit OS's work.
4417 4467 * Hopefully, those are the only "extended" bits
4418 4468 * they'll add.
4419 4469 */
4420 4470 /*FALLTHROUGH*/
4421 4471
4422 4472 case X86_VENDOR_AMD:
4423 4473 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
4424 4474 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
4425 4475
4426 4476 *edx = CPI_FEATURES_XTD_EDX(cpi);
4427 4477 *ecx = CPI_FEATURES_XTD_ECX(cpi);
4428 4478
4429 4479 /*
4430 4480 * [these features require explicit kernel support]
4431 4481 */
4432 4482 switch (cpi->cpi_vendor) {
4433 4483 case X86_VENDOR_Intel:
4434 4484 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4435 4485 *edx &= ~CPUID_AMD_EDX_TSCP;
4436 4486 break;
4437 4487
4438 4488 case X86_VENDOR_AMD:
4439 4489 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
4440 4490 *edx &= ~CPUID_AMD_EDX_TSCP;
4441 4491 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
4442 4492 *ecx &= ~CPUID_AMD_ECX_SSE4A;
4443 4493 break;
4444 4494
4445 4495 default:
4446 4496 break;
4447 4497 }
4448 4498
4449 4499 /*
4450 4500 * [no explicit support required beyond
4451 4501 * x87 fp context and exception handlers]
4452 4502 */
4453 4503 if (!fpu_exists)
4454 4504 *edx &= ~(CPUID_AMD_EDX_MMXamd |
4455 4505 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
4456 4506
4457 4507 if (!is_x86_feature(x86_featureset, X86FSET_NX))
4458 4508 *edx &= ~CPUID_AMD_EDX_NX;
4459 4509 #if !defined(__amd64)
4460 4510 *edx &= ~CPUID_AMD_EDX_LM;
4461 4511 #endif
4462 4512 /*
4463 4513 * Now map the supported feature vector to
4464 4514 * things that we think userland will care about.
4465 4515 */
4466 4516 #if defined(__amd64)
4467 4517 if (*edx & CPUID_AMD_EDX_SYSC)
4468 4518 hwcap_flags |= AV_386_AMD_SYSC;
4469 4519 #endif
4470 4520 if (*edx & CPUID_AMD_EDX_MMXamd)
4471 4521 hwcap_flags |= AV_386_AMD_MMX;
4472 4522 if (*edx & CPUID_AMD_EDX_3DNow)
4473 4523 hwcap_flags |= AV_386_AMD_3DNow;
4474 4524 if (*edx & CPUID_AMD_EDX_3DNowx)
4475 4525 hwcap_flags |= AV_386_AMD_3DNowx;
4476 4526 if (*ecx & CPUID_AMD_ECX_SVM)
4477 4527 hwcap_flags |= AV_386_AMD_SVM;
4478 4528
4479 4529 switch (cpi->cpi_vendor) {
4480 4530 case X86_VENDOR_AMD:
4481 4531 if (*edx & CPUID_AMD_EDX_TSCP)
4482 4532 hwcap_flags |= AV_386_TSCP;
4483 4533 if (*ecx & CPUID_AMD_ECX_AHF64)
4484 4534 hwcap_flags |= AV_386_AHF;
4485 4535 if (*ecx & CPUID_AMD_ECX_SSE4A)
4486 4536 hwcap_flags |= AV_386_AMD_SSE4A;
4487 4537 if (*ecx & CPUID_AMD_ECX_LZCNT)
4488 4538 hwcap_flags |= AV_386_AMD_LZCNT;
4489 4539 if (*ecx & CPUID_AMD_ECX_MONITORX)
4490 4540 hwcap_flags_2 |= AV_386_2_MONITORX;
4491 4541 break;
4492 4542
4493 4543 case X86_VENDOR_Intel:
4494 4544 if (*edx & CPUID_AMD_EDX_TSCP)
4495 4545 hwcap_flags |= AV_386_TSCP;
4496 4546 if (*ecx & CPUID_AMD_ECX_LZCNT)
4497 4547 hwcap_flags |= AV_386_AMD_LZCNT;
4498 4548 /*
4499 4549 * Aarrgh.
4500 4550 * Intel uses a different bit in the same word.
4501 4551 */
4502 4552 if (*ecx & CPUID_INTC_ECX_AHF64)
4503 4553 hwcap_flags |= AV_386_AHF;
4504 4554 break;
4505 4555
4506 4556 default:
4507 4557 break;
4508 4558 }
4509 4559 break;
4510 4560
4511 4561 case X86_VENDOR_TM:
4512 4562 cp.cp_eax = 0x80860001;
4513 4563 (void) __cpuid_insn(&cp);
4514 4564 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
4515 4565 break;
4516 4566
4517 4567 default:
4518 4568 break;
4519 4569 }
4520 4570
4521 4571 pass4_done:
4522 4572 cpi->cpi_pass = 4;
4523 4573 if (hwcap_out != NULL) {
4524 4574 hwcap_out[0] = hwcap_flags;
4525 4575 hwcap_out[1] = hwcap_flags_2;
4526 4576 }
4527 4577 }
4528 4578
4529 4579
4530 4580 /*
4531 4581 * Simulate the cpuid instruction using the data we previously
4532 4582 * captured about this CPU. We try our best to return the truth
4533 4583 * about the hardware, independently of kernel support.
4534 4584 */
4535 4585 uint32_t
4536 4586 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
4537 4587 {
4538 4588 struct cpuid_info *cpi;
4539 4589 struct cpuid_regs *xcp;
4540 4590
4541 4591 if (cpu == NULL)
4542 4592 cpu = CPU;
4543 4593 cpi = cpu->cpu_m.mcpu_cpi;
4544 4594
4545 4595 ASSERT(cpuid_checkpass(cpu, 3));
4546 4596
4547 4597 /*
4548 4598 * CPUID data is cached in two separate places: cpi_std for standard
4549 4599 * CPUID leaves , and cpi_extd for extended CPUID leaves.
4550 4600 */
4551 4601 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD) {
4552 4602 xcp = &cpi->cpi_std[cp->cp_eax];
4553 4603 } else if (cp->cp_eax >= CPUID_LEAF_EXT_0 &&
4554 4604 cp->cp_eax <= cpi->cpi_xmaxeax &&
4555 4605 cp->cp_eax < CPUID_LEAF_EXT_0 + NMAX_CPI_EXTD) {
4556 4606 xcp = &cpi->cpi_extd[cp->cp_eax - CPUID_LEAF_EXT_0];
4557 4607 } else {
4558 4608 /*
4559 4609 * The caller is asking for data from an input parameter which
4560 4610 * the kernel has not cached. In this case we go fetch from
4561 4611 * the hardware and return the data directly to the user.
4562 4612 */
4563 4613 return (__cpuid_insn(cp));
4564 4614 }
4565 4615
4566 4616 cp->cp_eax = xcp->cp_eax;
4567 4617 cp->cp_ebx = xcp->cp_ebx;
4568 4618 cp->cp_ecx = xcp->cp_ecx;
4569 4619 cp->cp_edx = xcp->cp_edx;
4570 4620 return (cp->cp_eax);
4571 4621 }
4572 4622
4573 4623 int
4574 4624 cpuid_checkpass(cpu_t *cpu, int pass)
4575 4625 {
4576 4626 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
4577 4627 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
4578 4628 }
4579 4629
4580 4630 int
4581 4631 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
4582 4632 {
4583 4633 ASSERT(cpuid_checkpass(cpu, 3));
4584 4634
4585 4635 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
4586 4636 }
4587 4637
4588 4638 int
4589 4639 cpuid_is_cmt(cpu_t *cpu)
4590 4640 {
4591 4641 if (cpu == NULL)
4592 4642 cpu = CPU;
4593 4643
4594 4644 ASSERT(cpuid_checkpass(cpu, 1));
4595 4645
4596 4646 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
4597 4647 }
4598 4648
4599 4649 /*
4600 4650 * AMD and Intel both implement the 64-bit variant of the syscall
4601 4651 * instruction (syscallq), so if there's -any- support for syscall,
4602 4652 * cpuid currently says "yes, we support this".
4603 4653 *
4604 4654 * However, Intel decided to -not- implement the 32-bit variant of the
4605 4655 * syscall instruction, so we provide a predicate to allow our caller
4606 4656 * to test that subtlety here.
4607 4657 *
4608 4658 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
4609 4659 * even in the case where the hardware would in fact support it.
4610 4660 */
4611 4661 /*ARGSUSED*/
4612 4662 int
4613 4663 cpuid_syscall32_insn(cpu_t *cpu)
4614 4664 {
4615 4665 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
4616 4666
4617 4667 #if !defined(__xpv)
4618 4668 if (cpu == NULL)
4619 4669 cpu = CPU;
4620 4670
4621 4671 /*CSTYLED*/
4622 4672 {
4623 4673 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4624 4674
4625 4675 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
4626 4676 cpi->cpi_xmaxeax >= 0x80000001 &&
4627 4677 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
4628 4678 return (1);
4629 4679 }
4630 4680 #endif
4631 4681 return (0);
4632 4682 }
4633 4683
4634 4684 int
4635 4685 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
4636 4686 {
4637 4687 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4638 4688
4639 4689 static const char fmt[] =
4640 4690 "x86 (%s %X family %d model %d step %d clock %d MHz)";
4641 4691 static const char fmt_ht[] =
4642 4692 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
4643 4693
4644 4694 ASSERT(cpuid_checkpass(cpu, 1));
4645 4695
4646 4696 if (cpuid_is_cmt(cpu))
4647 4697 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
4648 4698 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4649 4699 cpi->cpi_family, cpi->cpi_model,
4650 4700 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4651 4701 return (snprintf(s, n, fmt,
4652 4702 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
4653 4703 cpi->cpi_family, cpi->cpi_model,
4654 4704 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
4655 4705 }
4656 4706
4657 4707 const char *
4658 4708 cpuid_getvendorstr(cpu_t *cpu)
4659 4709 {
4660 4710 ASSERT(cpuid_checkpass(cpu, 1));
4661 4711 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
4662 4712 }
4663 4713
4664 4714 uint_t
4665 4715 cpuid_getvendor(cpu_t *cpu)
4666 4716 {
4667 4717 ASSERT(cpuid_checkpass(cpu, 1));
4668 4718 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
4669 4719 }
4670 4720
4671 4721 uint_t
4672 4722 cpuid_getfamily(cpu_t *cpu)
4673 4723 {
4674 4724 ASSERT(cpuid_checkpass(cpu, 1));
4675 4725 return (cpu->cpu_m.mcpu_cpi->cpi_family);
4676 4726 }
4677 4727
4678 4728 uint_t
4679 4729 cpuid_getmodel(cpu_t *cpu)
4680 4730 {
4681 4731 ASSERT(cpuid_checkpass(cpu, 1));
4682 4732 return (cpu->cpu_m.mcpu_cpi->cpi_model);
4683 4733 }
4684 4734
4685 4735 uint_t
4686 4736 cpuid_get_ncpu_per_chip(cpu_t *cpu)
4687 4737 {
4688 4738 ASSERT(cpuid_checkpass(cpu, 1));
4689 4739 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
4690 4740 }
4691 4741
4692 4742 uint_t
4693 4743 cpuid_get_ncore_per_chip(cpu_t *cpu)
4694 4744 {
4695 4745 ASSERT(cpuid_checkpass(cpu, 1));
4696 4746 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
4697 4747 }
4698 4748
4699 4749 uint_t
4700 4750 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
4701 4751 {
4702 4752 ASSERT(cpuid_checkpass(cpu, 2));
4703 4753 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
4704 4754 }
4705 4755
4706 4756 id_t
4707 4757 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
4708 4758 {
4709 4759 ASSERT(cpuid_checkpass(cpu, 2));
4710 4760 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4711 4761 }
4712 4762
4713 4763 uint_t
4714 4764 cpuid_getstep(cpu_t *cpu)
4715 4765 {
4716 4766 ASSERT(cpuid_checkpass(cpu, 1));
4717 4767 return (cpu->cpu_m.mcpu_cpi->cpi_step);
4718 4768 }
4719 4769
4720 4770 uint_t
4721 4771 cpuid_getsig(struct cpu *cpu)
4722 4772 {
4723 4773 ASSERT(cpuid_checkpass(cpu, 1));
4724 4774 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
4725 4775 }
4726 4776
4727 4777 uint32_t
4728 4778 cpuid_getchiprev(struct cpu *cpu)
4729 4779 {
4730 4780 ASSERT(cpuid_checkpass(cpu, 1));
4731 4781 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
4732 4782 }
4733 4783
4734 4784 const char *
4735 4785 cpuid_getchiprevstr(struct cpu *cpu)
4736 4786 {
4737 4787 ASSERT(cpuid_checkpass(cpu, 1));
4738 4788 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
4739 4789 }
4740 4790
4741 4791 uint32_t
4742 4792 cpuid_getsockettype(struct cpu *cpu)
4743 4793 {
4744 4794 ASSERT(cpuid_checkpass(cpu, 1));
4745 4795 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
4746 4796 }
4747 4797
4748 4798 const char *
4749 4799 cpuid_getsocketstr(cpu_t *cpu)
4750 4800 {
4751 4801 static const char *socketstr = NULL;
4752 4802 struct cpuid_info *cpi;
4753 4803
4754 4804 ASSERT(cpuid_checkpass(cpu, 1));
4755 4805 cpi = cpu->cpu_m.mcpu_cpi;
4756 4806
4757 4807 /* Assume that socket types are the same across the system */
4758 4808 if (socketstr == NULL)
4759 4809 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
4760 4810 cpi->cpi_model, cpi->cpi_step);
4761 4811
4762 4812
4763 4813 return (socketstr);
4764 4814 }
4765 4815
4766 4816 int
4767 4817 cpuid_get_chipid(cpu_t *cpu)
4768 4818 {
4769 4819 ASSERT(cpuid_checkpass(cpu, 1));
4770 4820
4771 4821 if (cpuid_is_cmt(cpu))
4772 4822 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
4773 4823 return (cpu->cpu_id);
4774 4824 }
4775 4825
4776 4826 id_t
4777 4827 cpuid_get_coreid(cpu_t *cpu)
4778 4828 {
4779 4829 ASSERT(cpuid_checkpass(cpu, 1));
4780 4830 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
4781 4831 }
4782 4832
4783 4833 int
4784 4834 cpuid_get_pkgcoreid(cpu_t *cpu)
4785 4835 {
4786 4836 ASSERT(cpuid_checkpass(cpu, 1));
4787 4837 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
4788 4838 }
4789 4839
4790 4840 int
4791 4841 cpuid_get_clogid(cpu_t *cpu)
4792 4842 {
4793 4843 ASSERT(cpuid_checkpass(cpu, 1));
4794 4844 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
4795 4845 }
4796 4846
4797 4847 int
4798 4848 cpuid_get_cacheid(cpu_t *cpu)
4799 4849 {
4800 4850 ASSERT(cpuid_checkpass(cpu, 1));
4801 4851 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
4802 4852 }
4803 4853
4804 4854 uint_t
4805 4855 cpuid_get_procnodeid(cpu_t *cpu)
4806 4856 {
4807 4857 ASSERT(cpuid_checkpass(cpu, 1));
4808 4858 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
4809 4859 }
4810 4860
4811 4861 uint_t
4812 4862 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
4813 4863 {
4814 4864 ASSERT(cpuid_checkpass(cpu, 1));
4815 4865 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
4816 4866 }
4817 4867
4818 4868 uint_t
4819 4869 cpuid_get_compunitid(cpu_t *cpu)
4820 4870 {
4821 4871 ASSERT(cpuid_checkpass(cpu, 1));
4822 4872 return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
4823 4873 }
4824 4874
4825 4875 uint_t
4826 4876 cpuid_get_cores_per_compunit(cpu_t *cpu)
4827 4877 {
4828 4878 ASSERT(cpuid_checkpass(cpu, 1));
4829 4879 return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
4830 4880 }
4831 4881
4832 4882 /*ARGSUSED*/
4833 4883 int
4834 4884 cpuid_have_cr8access(cpu_t *cpu)
4835 4885 {
4836 4886 #if defined(__amd64)
4837 4887 return (1);
4838 4888 #else
4839 4889 struct cpuid_info *cpi;
4840 4890
4841 4891 ASSERT(cpu != NULL);
4842 4892 cpi = cpu->cpu_m.mcpu_cpi;
4843 4893 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
4844 4894 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
4845 4895 return (1);
4846 4896 return (0);
4847 4897 #endif
4848 4898 }
4849 4899
4850 4900 uint32_t
4851 4901 cpuid_get_apicid(cpu_t *cpu)
4852 4902 {
4853 4903 ASSERT(cpuid_checkpass(cpu, 1));
4854 4904 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
4855 4905 return (UINT32_MAX);
4856 4906 } else {
4857 4907 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
4858 4908 }
4859 4909 }
4860 4910
4861 4911 void
4862 4912 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
4863 4913 {
4864 4914 struct cpuid_info *cpi;
4865 4915
4866 4916 if (cpu == NULL)
4867 4917 cpu = CPU;
4868 4918 cpi = cpu->cpu_m.mcpu_cpi;
4869 4919
4870 4920 ASSERT(cpuid_checkpass(cpu, 1));
4871 4921
4872 4922 if (pabits)
4873 4923 *pabits = cpi->cpi_pabits;
4874 4924 if (vabits)
4875 4925 *vabits = cpi->cpi_vabits;
4876 4926 }
4877 4927
4878 4928 size_t
4879 4929 cpuid_get_xsave_size()
4880 4930 {
4881 4931 return (MAX(cpuid_info0.cpi_xsave.xsav_max_size,
4882 4932 sizeof (struct xsave_state)));
4883 4933 }
4884 4934
4885 4935 /*
4886 4936 * Return true if the CPUs on this system require 'pointer clearing' for the
4887 4937 * floating point error pointer exception handling. In the past, this has been
4888 4938 * true for all AMD K7 & K8 CPUs, although newer AMD CPUs have been changed to
4889 4939 * behave the same as Intel. This is checked via the CPUID_AMD_EBX_ERR_PTR_ZERO
4890 4940 * feature bit and is reflected in the cpi_fp_amd_save member.
4891 4941 */
4892 4942 boolean_t
4893 4943 cpuid_need_fp_excp_handling()
4894 4944 {
4895 4945 return (cpuid_info0.cpi_vendor == X86_VENDOR_AMD &&
4896 4946 cpuid_info0.cpi_fp_amd_save != 0);
4897 4947 }
4898 4948
4899 4949 /*
4900 4950 * Returns the number of data TLB entries for a corresponding
4901 4951 * pagesize. If it can't be computed, or isn't known, the
4902 4952 * routine returns zero. If you ask about an architecturally
4903 4953 * impossible pagesize, the routine will panic (so that the
4904 4954 * hat implementor knows that things are inconsistent.)
4905 4955 */
4906 4956 uint_t
4907 4957 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
4908 4958 {
4909 4959 struct cpuid_info *cpi;
4910 4960 uint_t dtlb_nent = 0;
4911 4961
4912 4962 if (cpu == NULL)
4913 4963 cpu = CPU;
4914 4964 cpi = cpu->cpu_m.mcpu_cpi;
4915 4965
4916 4966 ASSERT(cpuid_checkpass(cpu, 1));
4917 4967
4918 4968 /*
4919 4969 * Check the L2 TLB info
4920 4970 */
4921 4971 if (cpi->cpi_xmaxeax >= 0x80000006) {
4922 4972 struct cpuid_regs *cp = &cpi->cpi_extd[6];
4923 4973
4924 4974 switch (pagesize) {
4925 4975
4926 4976 case 4 * 1024:
4927 4977 /*
4928 4978 * All zero in the top 16 bits of the register
4929 4979 * indicates a unified TLB. Size is in low 16 bits.
4930 4980 */
4931 4981 if ((cp->cp_ebx & 0xffff0000) == 0)
4932 4982 dtlb_nent = cp->cp_ebx & 0x0000ffff;
4933 4983 else
4934 4984 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
4935 4985 break;
4936 4986
4937 4987 case 2 * 1024 * 1024:
4938 4988 if ((cp->cp_eax & 0xffff0000) == 0)
4939 4989 dtlb_nent = cp->cp_eax & 0x0000ffff;
4940 4990 else
4941 4991 dtlb_nent = BITX(cp->cp_eax, 27, 16);
4942 4992 break;
4943 4993
4944 4994 default:
4945 4995 panic("unknown L2 pagesize");
4946 4996 /*NOTREACHED*/
4947 4997 }
4948 4998 }
4949 4999
4950 5000 if (dtlb_nent != 0)
4951 5001 return (dtlb_nent);
4952 5002
4953 5003 /*
4954 5004 * No L2 TLB support for this size, try L1.
4955 5005 */
4956 5006 if (cpi->cpi_xmaxeax >= 0x80000005) {
4957 5007 struct cpuid_regs *cp = &cpi->cpi_extd[5];
4958 5008
4959 5009 switch (pagesize) {
4960 5010 case 4 * 1024:
4961 5011 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
4962 5012 break;
4963 5013 case 2 * 1024 * 1024:
4964 5014 dtlb_nent = BITX(cp->cp_eax, 23, 16);
4965 5015 break;
4966 5016 default:
4967 5017 panic("unknown L1 d-TLB pagesize");
4968 5018 /*NOTREACHED*/
4969 5019 }
4970 5020 }
4971 5021
4972 5022 return (dtlb_nent);
4973 5023 }
4974 5024
4975 5025 /*
4976 5026 * Return 0 if the erratum is not present or not applicable, positive
4977 5027 * if it is, and negative if the status of the erratum is unknown.
4978 5028 *
4979 5029 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
4980 5030 * Processors" #25759, Rev 3.57, August 2005
4981 5031 */
4982 5032 int
4983 5033 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
4984 5034 {
4985 5035 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4986 5036 uint_t eax;
4987 5037
4988 5038 /*
4989 5039 * Bail out if this CPU isn't an AMD CPU, or if it's
4990 5040 * a legacy (32-bit) AMD CPU.
4991 5041 */
4992 5042 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
4993 5043 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
4994 5044 cpi->cpi_family == 6) {
4995 5045 return (0);
4996 5046 }
4997 5047
4998 5048 eax = cpi->cpi_std[1].cp_eax;
4999 5049
5000 5050 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
5001 5051 #define SH_B3(eax) (eax == 0xf51)
5002 5052 #define B(eax) (SH_B0(eax) || SH_B3(eax))
5003 5053
5004 5054 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
5005 5055
5006 5056 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
5007 5057 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
5008 5058 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
5009 5059 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
5010 5060
5011 5061 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
5012 5062 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
5013 5063 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
5014 5064 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
5015 5065
5016 5066 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
5017 5067 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
5018 5068 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
5019 5069 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
5020 5070 #define BH_E4(eax) (eax == 0x20fb1)
5021 5071 #define SH_E5(eax) (eax == 0x20f42)
5022 5072 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
5023 5073 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
5024 5074 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
5025 5075 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
5026 5076 DH_E6(eax) || JH_E6(eax))
5027 5077
5028 5078 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
5029 5079 #define DR_B0(eax) (eax == 0x100f20)
5030 5080 #define DR_B1(eax) (eax == 0x100f21)
5031 5081 #define DR_BA(eax) (eax == 0x100f2a)
5032 5082 #define DR_B2(eax) (eax == 0x100f22)
5033 5083 #define DR_B3(eax) (eax == 0x100f23)
5034 5084 #define RB_C0(eax) (eax == 0x100f40)
5035 5085
5036 5086 switch (erratum) {
5037 5087 case 1:
5038 5088 return (cpi->cpi_family < 0x10);
5039 5089 case 51: /* what does the asterisk mean? */
5040 5090 return (B(eax) || SH_C0(eax) || CG(eax));
5041 5091 case 52:
5042 5092 return (B(eax));
5043 5093 case 57:
5044 5094 return (cpi->cpi_family <= 0x11);
5045 5095 case 58:
5046 5096 return (B(eax));
5047 5097 case 60:
5048 5098 return (cpi->cpi_family <= 0x11);
5049 5099 case 61:
5050 5100 case 62:
5051 5101 case 63:
5052 5102 case 64:
5053 5103 case 65:
5054 5104 case 66:
5055 5105 case 68:
5056 5106 case 69:
5057 5107 case 70:
5058 5108 case 71:
5059 5109 return (B(eax));
5060 5110 case 72:
5061 5111 return (SH_B0(eax));
5062 5112 case 74:
5063 5113 return (B(eax));
5064 5114 case 75:
5065 5115 return (cpi->cpi_family < 0x10);
5066 5116 case 76:
5067 5117 return (B(eax));
5068 5118 case 77:
5069 5119 return (cpi->cpi_family <= 0x11);
5070 5120 case 78:
5071 5121 return (B(eax) || SH_C0(eax));
5072 5122 case 79:
5073 5123 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5074 5124 case 80:
5075 5125 case 81:
5076 5126 case 82:
5077 5127 return (B(eax));
5078 5128 case 83:
5079 5129 return (B(eax) || SH_C0(eax) || CG(eax));
5080 5130 case 85:
5081 5131 return (cpi->cpi_family < 0x10);
5082 5132 case 86:
5083 5133 return (SH_C0(eax) || CG(eax));
5084 5134 case 88:
5085 5135 #if !defined(__amd64)
5086 5136 return (0);
5087 5137 #else
5088 5138 return (B(eax) || SH_C0(eax));
5089 5139 #endif
5090 5140 case 89:
5091 5141 return (cpi->cpi_family < 0x10);
5092 5142 case 90:
5093 5143 return (B(eax) || SH_C0(eax) || CG(eax));
5094 5144 case 91:
5095 5145 case 92:
5096 5146 return (B(eax) || SH_C0(eax));
5097 5147 case 93:
5098 5148 return (SH_C0(eax));
5099 5149 case 94:
5100 5150 return (B(eax) || SH_C0(eax) || CG(eax));
5101 5151 case 95:
5102 5152 #if !defined(__amd64)
5103 5153 return (0);
5104 5154 #else
5105 5155 return (B(eax) || SH_C0(eax));
5106 5156 #endif
5107 5157 case 96:
5108 5158 return (B(eax) || SH_C0(eax) || CG(eax));
5109 5159 case 97:
5110 5160 case 98:
5111 5161 return (SH_C0(eax) || CG(eax));
5112 5162 case 99:
5113 5163 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5114 5164 case 100:
5115 5165 return (B(eax) || SH_C0(eax));
5116 5166 case 101:
5117 5167 case 103:
5118 5168 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5119 5169 case 104:
5120 5170 return (SH_C0(eax) || CG(eax) || D0(eax));
5121 5171 case 105:
5122 5172 case 106:
5123 5173 case 107:
5124 5174 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5125 5175 case 108:
5126 5176 return (DH_CG(eax));
5127 5177 case 109:
5128 5178 return (SH_C0(eax) || CG(eax) || D0(eax));
5129 5179 case 110:
5130 5180 return (D0(eax) || EX(eax));
5131 5181 case 111:
5132 5182 return (CG(eax));
5133 5183 case 112:
5134 5184 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5135 5185 case 113:
5136 5186 return (eax == 0x20fc0);
5137 5187 case 114:
5138 5188 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5139 5189 case 115:
5140 5190 return (SH_E0(eax) || JH_E1(eax));
5141 5191 case 116:
5142 5192 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
5143 5193 case 117:
5144 5194 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
5145 5195 case 118:
5146 5196 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
5147 5197 JH_E6(eax));
5148 5198 case 121:
5149 5199 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
5150 5200 case 122:
5151 5201 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
5152 5202 case 123:
5153 5203 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
5154 5204 case 131:
5155 5205 return (cpi->cpi_family < 0x10);
5156 5206 case 6336786:
5157 5207
5158 5208 /*
5159 5209 * Test for AdvPowerMgmtInfo.TscPStateInvariant
5160 5210 * if this is a K8 family or newer processor. We're testing for
5161 5211 * this 'erratum' to determine whether or not we have a constant
5162 5212 * TSC.
5163 5213 *
5164 5214 * Our current fix for this is to disable the C1-Clock ramping.
5165 5215 * However, this doesn't work on newer processor families nor
5166 5216 * does it work when virtualized as those devices don't exist.
5167 5217 */
5168 5218 if (cpi->cpi_family >= 0x12 || get_hwenv() != HW_NATIVE) {
5169 5219 return (0);
5170 5220 }
5171 5221
5172 5222 if (CPI_FAMILY(cpi) == 0xf) {
5173 5223 struct cpuid_regs regs;
5174 5224 regs.cp_eax = 0x80000007;
5175 5225 (void) __cpuid_insn(®s);
5176 5226 return (!(regs.cp_edx & 0x100));
5177 5227 }
5178 5228 return (0);
5179 5229 case 6323525:
5180 5230 /*
5181 5231 * This erratum (K8 #147) is not present on family 10 and newer.
5182 5232 */
5183 5233 if (cpi->cpi_family >= 0x10) {
5184 5234 return (0);
5185 5235 }
5186 5236 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
5187 5237 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
5188 5238
5189 5239 case 6671130:
5190 5240 /*
5191 5241 * check for processors (pre-Shanghai) that do not provide
5192 5242 * optimal management of 1gb ptes in its tlb.
5193 5243 */
5194 5244 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
5195 5245
5196 5246 case 298:
5197 5247 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
5198 5248 DR_B2(eax) || RB_C0(eax));
5199 5249
5200 5250 case 721:
5201 5251 #if defined(__amd64)
5202 5252 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
5203 5253 #else
5204 5254 return (0);
5205 5255 #endif
5206 5256
5207 5257 default:
5208 5258 return (-1);
5209 5259
5210 5260 }
5211 5261 }
5212 5262
5213 5263 /*
5214 5264 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
5215 5265 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
5216 5266 */
5217 5267 int
5218 5268 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
5219 5269 {
5220 5270 struct cpuid_info *cpi;
5221 5271 uint_t osvwid;
5222 5272 static int osvwfeature = -1;
5223 5273 uint64_t osvwlength;
5224 5274
5225 5275
5226 5276 cpi = cpu->cpu_m.mcpu_cpi;
5227 5277
5228 5278 /* confirm OSVW supported */
5229 5279 if (osvwfeature == -1) {
5230 5280 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
5231 5281 } else {
5232 5282 /* assert that osvw feature setting is consistent on all cpus */
5233 5283 ASSERT(osvwfeature ==
5234 5284 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
5235 5285 }
5236 5286 if (!osvwfeature)
5237 5287 return (-1);
5238 5288
5239 5289 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
5240 5290
5241 5291 switch (erratum) {
5242 5292 case 298: /* osvwid is 0 */
5243 5293 osvwid = 0;
5244 5294 if (osvwlength <= (uint64_t)osvwid) {
5245 5295 /* osvwid 0 is unknown */
5246 5296 return (-1);
5247 5297 }
5248 5298
5249 5299 /*
5250 5300 * Check the OSVW STATUS MSR to determine the state
5251 5301 * of the erratum where:
5252 5302 * 0 - fixed by HW
5253 5303 * 1 - BIOS has applied the workaround when BIOS
5254 5304 * workaround is available. (Or for other errata,
5255 5305 * OS workaround is required.)
5256 5306 * For a value of 1, caller will confirm that the
5257 5307 * erratum 298 workaround has indeed been applied by BIOS.
5258 5308 *
5259 5309 * A 1 may be set in cpus that have a HW fix
5260 5310 * in a mixed cpu system. Regarding erratum 298:
5261 5311 * In a multiprocessor platform, the workaround above
5262 5312 * should be applied to all processors regardless of
5263 5313 * silicon revision when an affected processor is
5264 5314 * present.
5265 5315 */
5266 5316
5267 5317 return (rdmsr(MSR_AMD_OSVW_STATUS +
5268 5318 (osvwid / OSVW_ID_CNT_PER_MSR)) &
5269 5319 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
5270 5320
5271 5321 default:
5272 5322 return (-1);
5273 5323 }
5274 5324 }
5275 5325
5276 5326 static const char assoc_str[] = "associativity";
5277 5327 static const char line_str[] = "line-size";
5278 5328 static const char size_str[] = "size";
5279 5329
5280 5330 static void
5281 5331 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
5282 5332 uint32_t val)
5283 5333 {
5284 5334 char buf[128];
5285 5335
5286 5336 /*
5287 5337 * ndi_prop_update_int() is used because it is desirable for
5288 5338 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
5289 5339 */
5290 5340 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
5291 5341 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
5292 5342 }
5293 5343
5294 5344 /*
5295 5345 * Intel-style cache/tlb description
5296 5346 *
5297 5347 * Standard cpuid level 2 gives a randomly ordered
5298 5348 * selection of tags that index into a table that describes
5299 5349 * cache and tlb properties.
5300 5350 */
5301 5351
5302 5352 static const char l1_icache_str[] = "l1-icache";
5303 5353 static const char l1_dcache_str[] = "l1-dcache";
5304 5354 static const char l2_cache_str[] = "l2-cache";
5305 5355 static const char l3_cache_str[] = "l3-cache";
5306 5356 static const char itlb4k_str[] = "itlb-4K";
5307 5357 static const char dtlb4k_str[] = "dtlb-4K";
5308 5358 static const char itlb2M_str[] = "itlb-2M";
5309 5359 static const char itlb4M_str[] = "itlb-4M";
5310 5360 static const char dtlb4M_str[] = "dtlb-4M";
5311 5361 static const char dtlb24_str[] = "dtlb0-2M-4M";
5312 5362 static const char itlb424_str[] = "itlb-4K-2M-4M";
5313 5363 static const char itlb24_str[] = "itlb-2M-4M";
5314 5364 static const char dtlb44_str[] = "dtlb-4K-4M";
5315 5365 static const char sl1_dcache_str[] = "sectored-l1-dcache";
5316 5366 static const char sl2_cache_str[] = "sectored-l2-cache";
5317 5367 static const char itrace_str[] = "itrace-cache";
5318 5368 static const char sl3_cache_str[] = "sectored-l3-cache";
5319 5369 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
5320 5370
5321 5371 static const struct cachetab {
5322 5372 uint8_t ct_code;
5323 5373 uint8_t ct_assoc;
5324 5374 uint16_t ct_line_size;
5325 5375 size_t ct_size;
5326 5376 const char *ct_label;
5327 5377 } intel_ctab[] = {
5328 5378 /*
5329 5379 * maintain descending order!
5330 5380 *
5331 5381 * Codes ignored - Reason
5332 5382 * ----------------------
5333 5383 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
5334 5384 * f0H/f1H - Currently we do not interpret prefetch size by design
5335 5385 */
5336 5386 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
5337 5387 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
5338 5388 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
5339 5389 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
5340 5390 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
5341 5391 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
5342 5392 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
5343 5393 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
5344 5394 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
5345 5395 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
5346 5396 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
5347 5397 { 0xd0, 4, 64, 512*1024, l3_cache_str},
5348 5398 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
5349 5399 { 0xc0, 4, 0, 8, dtlb44_str },
5350 5400 { 0xba, 4, 0, 64, dtlb4k_str },
5351 5401 { 0xb4, 4, 0, 256, dtlb4k_str },
5352 5402 { 0xb3, 4, 0, 128, dtlb4k_str },
5353 5403 { 0xb2, 4, 0, 64, itlb4k_str },
5354 5404 { 0xb0, 4, 0, 128, itlb4k_str },
5355 5405 { 0x87, 8, 64, 1024*1024, l2_cache_str},
5356 5406 { 0x86, 4, 64, 512*1024, l2_cache_str},
5357 5407 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
5358 5408 { 0x84, 8, 32, 1024*1024, l2_cache_str},
5359 5409 { 0x83, 8, 32, 512*1024, l2_cache_str},
5360 5410 { 0x82, 8, 32, 256*1024, l2_cache_str},
5361 5411 { 0x80, 8, 64, 512*1024, l2_cache_str},
5362 5412 { 0x7f, 2, 64, 512*1024, l2_cache_str},
5363 5413 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
5364 5414 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
5365 5415 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
5366 5416 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
5367 5417 { 0x79, 8, 64, 128*1024, sl2_cache_str},
5368 5418 { 0x78, 8, 64, 1024*1024, l2_cache_str},
5369 5419 { 0x73, 8, 0, 64*1024, itrace_str},
5370 5420 { 0x72, 8, 0, 32*1024, itrace_str},
5371 5421 { 0x71, 8, 0, 16*1024, itrace_str},
5372 5422 { 0x70, 8, 0, 12*1024, itrace_str},
5373 5423 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
5374 5424 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
5375 5425 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
5376 5426 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
5377 5427 { 0x5d, 0, 0, 256, dtlb44_str},
5378 5428 { 0x5c, 0, 0, 128, dtlb44_str},
5379 5429 { 0x5b, 0, 0, 64, dtlb44_str},
5380 5430 { 0x5a, 4, 0, 32, dtlb24_str},
5381 5431 { 0x59, 0, 0, 16, dtlb4k_str},
5382 5432 { 0x57, 4, 0, 16, dtlb4k_str},
5383 5433 { 0x56, 4, 0, 16, dtlb4M_str},
5384 5434 { 0x55, 0, 0, 7, itlb24_str},
5385 5435 { 0x52, 0, 0, 256, itlb424_str},
5386 5436 { 0x51, 0, 0, 128, itlb424_str},
5387 5437 { 0x50, 0, 0, 64, itlb424_str},
5388 5438 { 0x4f, 0, 0, 32, itlb4k_str},
5389 5439 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
5390 5440 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
5391 5441 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
5392 5442 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
5393 5443 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
5394 5444 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
5395 5445 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
5396 5446 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
5397 5447 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
5398 5448 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
5399 5449 { 0x44, 4, 32, 1024*1024, l2_cache_str},
5400 5450 { 0x43, 4, 32, 512*1024, l2_cache_str},
5401 5451 { 0x42, 4, 32, 256*1024, l2_cache_str},
5402 5452 { 0x41, 4, 32, 128*1024, l2_cache_str},
5403 5453 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
5404 5454 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
5405 5455 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
5406 5456 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
5407 5457 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
5408 5458 { 0x39, 4, 64, 128*1024, sl2_cache_str},
5409 5459 { 0x30, 8, 64, 32*1024, l1_icache_str},
5410 5460 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
5411 5461 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
5412 5462 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
5413 5463 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
5414 5464 { 0x22, 4, 64, 512*1024, sl3_cache_str},
5415 5465 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
5416 5466 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
5417 5467 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
5418 5468 { 0x0b, 4, 0, 4, itlb4M_str},
5419 5469 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
5420 5470 { 0x08, 4, 32, 16*1024, l1_icache_str},
5421 5471 { 0x06, 4, 32, 8*1024, l1_icache_str},
5422 5472 { 0x05, 4, 0, 32, dtlb4M_str},
5423 5473 { 0x04, 4, 0, 8, dtlb4M_str},
5424 5474 { 0x03, 4, 0, 64, dtlb4k_str},
5425 5475 { 0x02, 4, 0, 2, itlb4M_str},
5426 5476 { 0x01, 4, 0, 32, itlb4k_str},
5427 5477 { 0 }
5428 5478 };
5429 5479
5430 5480 static const struct cachetab cyrix_ctab[] = {
5431 5481 { 0x70, 4, 0, 32, "tlb-4K" },
5432 5482 { 0x80, 4, 16, 16*1024, "l1-cache" },
5433 5483 { 0 }
5434 5484 };
5435 5485
5436 5486 /*
5437 5487 * Search a cache table for a matching entry
5438 5488 */
5439 5489 static const struct cachetab *
5440 5490 find_cacheent(const struct cachetab *ct, uint_t code)
5441 5491 {
5442 5492 if (code != 0) {
5443 5493 for (; ct->ct_code != 0; ct++)
5444 5494 if (ct->ct_code <= code)
5445 5495 break;
5446 5496 if (ct->ct_code == code)
5447 5497 return (ct);
5448 5498 }
5449 5499 return (NULL);
5450 5500 }
5451 5501
5452 5502 /*
5453 5503 * Populate cachetab entry with L2 or L3 cache-information using
5454 5504 * cpuid function 4. This function is called from intel_walk_cacheinfo()
5455 5505 * when descriptor 0x49 is encountered. It returns 0 if no such cache
5456 5506 * information is found.
5457 5507 */
5458 5508 static int
5459 5509 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
5460 5510 {
5461 5511 uint32_t level, i;
5462 5512 int ret = 0;
5463 5513
5464 5514 for (i = 0; i < cpi->cpi_cache_leaf_size; i++) {
5465 5515 level = CPI_CACHE_LVL(cpi->cpi_cache_leaves[i]);
5466 5516
5467 5517 if (level == 2 || level == 3) {
5468 5518 ct->ct_assoc =
5469 5519 CPI_CACHE_WAYS(cpi->cpi_cache_leaves[i]) + 1;
5470 5520 ct->ct_line_size =
5471 5521 CPI_CACHE_COH_LN_SZ(cpi->cpi_cache_leaves[i]) + 1;
5472 5522 ct->ct_size = ct->ct_assoc *
5473 5523 (CPI_CACHE_PARTS(cpi->cpi_cache_leaves[i]) + 1) *
5474 5524 ct->ct_line_size *
5475 5525 (cpi->cpi_cache_leaves[i]->cp_ecx + 1);
5476 5526
5477 5527 if (level == 2) {
5478 5528 ct->ct_label = l2_cache_str;
5479 5529 } else if (level == 3) {
5480 5530 ct->ct_label = l3_cache_str;
5481 5531 }
5482 5532 ret = 1;
5483 5533 }
5484 5534 }
5485 5535
5486 5536 return (ret);
5487 5537 }
5488 5538
5489 5539 /*
5490 5540 * Walk the cacheinfo descriptor, applying 'func' to every valid element
5491 5541 * The walk is terminated if the walker returns non-zero.
5492 5542 */
5493 5543 static void
5494 5544 intel_walk_cacheinfo(struct cpuid_info *cpi,
5495 5545 void *arg, int (*func)(void *, const struct cachetab *))
5496 5546 {
5497 5547 const struct cachetab *ct;
5498 5548 struct cachetab des_49_ct, des_b1_ct;
5499 5549 uint8_t *dp;
5500 5550 int i;
5501 5551
5502 5552 if ((dp = cpi->cpi_cacheinfo) == NULL)
5503 5553 return;
5504 5554 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5505 5555 /*
5506 5556 * For overloaded descriptor 0x49 we use cpuid function 4
5507 5557 * if supported by the current processor, to create
5508 5558 * cache information.
5509 5559 * For overloaded descriptor 0xb1 we use X86_PAE flag
5510 5560 * to disambiguate the cache information.
5511 5561 */
5512 5562 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
5513 5563 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
5514 5564 ct = &des_49_ct;
5515 5565 } else if (*dp == 0xb1) {
5516 5566 des_b1_ct.ct_code = 0xb1;
5517 5567 des_b1_ct.ct_assoc = 4;
5518 5568 des_b1_ct.ct_line_size = 0;
5519 5569 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
5520 5570 des_b1_ct.ct_size = 8;
5521 5571 des_b1_ct.ct_label = itlb2M_str;
5522 5572 } else {
5523 5573 des_b1_ct.ct_size = 4;
5524 5574 des_b1_ct.ct_label = itlb4M_str;
5525 5575 }
5526 5576 ct = &des_b1_ct;
5527 5577 } else {
5528 5578 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
5529 5579 continue;
5530 5580 }
5531 5581 }
5532 5582
5533 5583 if (func(arg, ct) != 0) {
5534 5584 break;
5535 5585 }
5536 5586 }
5537 5587 }
5538 5588
5539 5589 /*
5540 5590 * (Like the Intel one, except for Cyrix CPUs)
5541 5591 */
5542 5592 static void
5543 5593 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
5544 5594 void *arg, int (*func)(void *, const struct cachetab *))
5545 5595 {
5546 5596 const struct cachetab *ct;
5547 5597 uint8_t *dp;
5548 5598 int i;
5549 5599
5550 5600 if ((dp = cpi->cpi_cacheinfo) == NULL)
5551 5601 return;
5552 5602 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
5553 5603 /*
5554 5604 * Search Cyrix-specific descriptor table first ..
5555 5605 */
5556 5606 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
5557 5607 if (func(arg, ct) != 0)
5558 5608 break;
5559 5609 continue;
5560 5610 }
5561 5611 /*
5562 5612 * .. else fall back to the Intel one
5563 5613 */
5564 5614 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
5565 5615 if (func(arg, ct) != 0)
5566 5616 break;
5567 5617 continue;
5568 5618 }
5569 5619 }
5570 5620 }
5571 5621
5572 5622 /*
5573 5623 * A cacheinfo walker that adds associativity, line-size, and size properties
5574 5624 * to the devinfo node it is passed as an argument.
5575 5625 */
5576 5626 static int
5577 5627 add_cacheent_props(void *arg, const struct cachetab *ct)
5578 5628 {
5579 5629 dev_info_t *devi = arg;
5580 5630
5581 5631 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
5582 5632 if (ct->ct_line_size != 0)
5583 5633 add_cache_prop(devi, ct->ct_label, line_str,
5584 5634 ct->ct_line_size);
5585 5635 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
5586 5636 return (0);
5587 5637 }
5588 5638
5589 5639
5590 5640 static const char fully_assoc[] = "fully-associative?";
5591 5641
5592 5642 /*
5593 5643 * AMD style cache/tlb description
5594 5644 *
5595 5645 * Extended functions 5 and 6 directly describe properties of
5596 5646 * tlbs and various cache levels.
5597 5647 */
5598 5648 static void
5599 5649 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5600 5650 {
5601 5651 switch (assoc) {
5602 5652 case 0: /* reserved; ignore */
5603 5653 break;
5604 5654 default:
5605 5655 add_cache_prop(devi, label, assoc_str, assoc);
5606 5656 break;
5607 5657 case 0xff:
5608 5658 add_cache_prop(devi, label, fully_assoc, 1);
5609 5659 break;
5610 5660 }
5611 5661 }
5612 5662
5613 5663 static void
5614 5664 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5615 5665 {
5616 5666 if (size == 0)
5617 5667 return;
5618 5668 add_cache_prop(devi, label, size_str, size);
5619 5669 add_amd_assoc(devi, label, assoc);
5620 5670 }
5621 5671
5622 5672 static void
5623 5673 add_amd_cache(dev_info_t *devi, const char *label,
5624 5674 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5625 5675 {
5626 5676 if (size == 0 || line_size == 0)
5627 5677 return;
5628 5678 add_amd_assoc(devi, label, assoc);
5629 5679 /*
5630 5680 * Most AMD parts have a sectored cache. Multiple cache lines are
5631 5681 * associated with each tag. A sector consists of all cache lines
5632 5682 * associated with a tag. For example, the AMD K6-III has a sector
5633 5683 * size of 2 cache lines per tag.
5634 5684 */
5635 5685 if (lines_per_tag != 0)
5636 5686 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5637 5687 add_cache_prop(devi, label, line_str, line_size);
5638 5688 add_cache_prop(devi, label, size_str, size * 1024);
5639 5689 }
5640 5690
5641 5691 static void
5642 5692 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
5643 5693 {
5644 5694 switch (assoc) {
5645 5695 case 0: /* off */
5646 5696 break;
5647 5697 case 1:
5648 5698 case 2:
5649 5699 case 4:
5650 5700 add_cache_prop(devi, label, assoc_str, assoc);
5651 5701 break;
5652 5702 case 6:
5653 5703 add_cache_prop(devi, label, assoc_str, 8);
5654 5704 break;
5655 5705 case 8:
5656 5706 add_cache_prop(devi, label, assoc_str, 16);
5657 5707 break;
5658 5708 case 0xf:
5659 5709 add_cache_prop(devi, label, fully_assoc, 1);
5660 5710 break;
5661 5711 default: /* reserved; ignore */
5662 5712 break;
5663 5713 }
5664 5714 }
5665 5715
5666 5716 static void
5667 5717 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
5668 5718 {
5669 5719 if (size == 0 || assoc == 0)
5670 5720 return;
5671 5721 add_amd_l2_assoc(devi, label, assoc);
5672 5722 add_cache_prop(devi, label, size_str, size);
5673 5723 }
5674 5724
5675 5725 static void
5676 5726 add_amd_l2_cache(dev_info_t *devi, const char *label,
5677 5727 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
5678 5728 {
5679 5729 if (size == 0 || assoc == 0 || line_size == 0)
5680 5730 return;
5681 5731 add_amd_l2_assoc(devi, label, assoc);
5682 5732 if (lines_per_tag != 0)
5683 5733 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
5684 5734 add_cache_prop(devi, label, line_str, line_size);
5685 5735 add_cache_prop(devi, label, size_str, size * 1024);
5686 5736 }
5687 5737
5688 5738 static void
5689 5739 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
5690 5740 {
5691 5741 struct cpuid_regs *cp;
5692 5742
5693 5743 if (cpi->cpi_xmaxeax < 0x80000005)
5694 5744 return;
5695 5745 cp = &cpi->cpi_extd[5];
5696 5746
5697 5747 /*
5698 5748 * 4M/2M L1 TLB configuration
5699 5749 *
5700 5750 * We report the size for 2M pages because AMD uses two
5701 5751 * TLB entries for one 4M page.
5702 5752 */
5703 5753 add_amd_tlb(devi, "dtlb-2M",
5704 5754 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
5705 5755 add_amd_tlb(devi, "itlb-2M",
5706 5756 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
5707 5757
5708 5758 /*
5709 5759 * 4K L1 TLB configuration
5710 5760 */
5711 5761
5712 5762 switch (cpi->cpi_vendor) {
5713 5763 uint_t nentries;
5714 5764 case X86_VENDOR_TM:
5715 5765 if (cpi->cpi_family >= 5) {
5716 5766 /*
5717 5767 * Crusoe processors have 256 TLB entries, but
5718 5768 * cpuid data format constrains them to only
5719 5769 * reporting 255 of them.
5720 5770 */
5721 5771 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
5722 5772 nentries = 256;
5723 5773 /*
5724 5774 * Crusoe processors also have a unified TLB
5725 5775 */
5726 5776 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
5727 5777 nentries);
5728 5778 break;
5729 5779 }
5730 5780 /*FALLTHROUGH*/
5731 5781 default:
5732 5782 add_amd_tlb(devi, itlb4k_str,
5733 5783 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
5734 5784 add_amd_tlb(devi, dtlb4k_str,
5735 5785 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
5736 5786 break;
5737 5787 }
5738 5788
5739 5789 /*
5740 5790 * data L1 cache configuration
5741 5791 */
5742 5792
5743 5793 add_amd_cache(devi, l1_dcache_str,
5744 5794 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
5745 5795 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
5746 5796
5747 5797 /*
5748 5798 * code L1 cache configuration
5749 5799 */
5750 5800
5751 5801 add_amd_cache(devi, l1_icache_str,
5752 5802 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
5753 5803 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
5754 5804
5755 5805 if (cpi->cpi_xmaxeax < 0x80000006)
5756 5806 return;
5757 5807 cp = &cpi->cpi_extd[6];
5758 5808
5759 5809 /* Check for a unified L2 TLB for large pages */
5760 5810
5761 5811 if (BITX(cp->cp_eax, 31, 16) == 0)
5762 5812 add_amd_l2_tlb(devi, "l2-tlb-2M",
5763 5813 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5764 5814 else {
5765 5815 add_amd_l2_tlb(devi, "l2-dtlb-2M",
5766 5816 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5767 5817 add_amd_l2_tlb(devi, "l2-itlb-2M",
5768 5818 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5769 5819 }
5770 5820
5771 5821 /* Check for a unified L2 TLB for 4K pages */
5772 5822
5773 5823 if (BITX(cp->cp_ebx, 31, 16) == 0) {
5774 5824 add_amd_l2_tlb(devi, "l2-tlb-4K",
5775 5825 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5776 5826 } else {
5777 5827 add_amd_l2_tlb(devi, "l2-dtlb-4K",
5778 5828 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
5779 5829 add_amd_l2_tlb(devi, "l2-itlb-4K",
5780 5830 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
5781 5831 }
5782 5832
5783 5833 add_amd_l2_cache(devi, l2_cache_str,
5784 5834 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
5785 5835 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
5786 5836 }
5787 5837
5788 5838 /*
5789 5839 * There are two basic ways that the x86 world describes it cache
5790 5840 * and tlb architecture - Intel's way and AMD's way.
5791 5841 *
5792 5842 * Return which flavor of cache architecture we should use
5793 5843 */
5794 5844 static int
5795 5845 x86_which_cacheinfo(struct cpuid_info *cpi)
5796 5846 {
5797 5847 switch (cpi->cpi_vendor) {
5798 5848 case X86_VENDOR_Intel:
5799 5849 if (cpi->cpi_maxeax >= 2)
5800 5850 return (X86_VENDOR_Intel);
5801 5851 break;
5802 5852 case X86_VENDOR_AMD:
5803 5853 /*
5804 5854 * The K5 model 1 was the first part from AMD that reported
5805 5855 * cache sizes via extended cpuid functions.
5806 5856 */
5807 5857 if (cpi->cpi_family > 5 ||
5808 5858 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
5809 5859 return (X86_VENDOR_AMD);
5810 5860 break;
5811 5861 case X86_VENDOR_TM:
5812 5862 if (cpi->cpi_family >= 5)
5813 5863 return (X86_VENDOR_AMD);
5814 5864 /*FALLTHROUGH*/
5815 5865 default:
5816 5866 /*
5817 5867 * If they have extended CPU data for 0x80000005
5818 5868 * then we assume they have AMD-format cache
5819 5869 * information.
5820 5870 *
5821 5871 * If not, and the vendor happens to be Cyrix,
5822 5872 * then try our-Cyrix specific handler.
5823 5873 *
5824 5874 * If we're not Cyrix, then assume we're using Intel's
5825 5875 * table-driven format instead.
5826 5876 */
5827 5877 if (cpi->cpi_xmaxeax >= 0x80000005)
5828 5878 return (X86_VENDOR_AMD);
5829 5879 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
5830 5880 return (X86_VENDOR_Cyrix);
5831 5881 else if (cpi->cpi_maxeax >= 2)
5832 5882 return (X86_VENDOR_Intel);
5833 5883 break;
5834 5884 }
5835 5885 return (-1);
5836 5886 }
5837 5887
5838 5888 void
5839 5889 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
5840 5890 struct cpuid_info *cpi)
5841 5891 {
5842 5892 dev_info_t *cpu_devi;
5843 5893 int create;
5844 5894
5845 5895 cpu_devi = (dev_info_t *)dip;
5846 5896
5847 5897 /* device_type */
5848 5898 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5849 5899 "device_type", "cpu");
5850 5900
5851 5901 /* reg */
5852 5902 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5853 5903 "reg", cpu_id);
5854 5904
5855 5905 /* cpu-mhz, and clock-frequency */
5856 5906 if (cpu_freq > 0) {
5857 5907 long long mul;
5858 5908
5859 5909 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5860 5910 "cpu-mhz", cpu_freq);
5861 5911 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
5862 5912 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5863 5913 "clock-frequency", (int)mul);
5864 5914 }
5865 5915
5866 5916 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
5867 5917 return;
5868 5918 }
5869 5919
5870 5920 /* vendor-id */
5871 5921 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
5872 5922 "vendor-id", cpi->cpi_vendorstr);
5873 5923
5874 5924 if (cpi->cpi_maxeax == 0) {
5875 5925 return;
5876 5926 }
5877 5927
5878 5928 /*
5879 5929 * family, model, and step
5880 5930 */
5881 5931 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5882 5932 "family", CPI_FAMILY(cpi));
5883 5933 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5884 5934 "cpu-model", CPI_MODEL(cpi));
5885 5935 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5886 5936 "stepping-id", CPI_STEP(cpi));
5887 5937
5888 5938 /* type */
5889 5939 switch (cpi->cpi_vendor) {
5890 5940 case X86_VENDOR_Intel:
5891 5941 create = 1;
5892 5942 break;
5893 5943 default:
5894 5944 create = 0;
5895 5945 break;
5896 5946 }
5897 5947 if (create)
5898 5948 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5899 5949 "type", CPI_TYPE(cpi));
5900 5950
5901 5951 /* ext-family */
5902 5952 switch (cpi->cpi_vendor) {
5903 5953 case X86_VENDOR_Intel:
5904 5954 case X86_VENDOR_AMD:
5905 5955 create = cpi->cpi_family >= 0xf;
5906 5956 break;
5907 5957 default:
5908 5958 create = 0;
5909 5959 break;
5910 5960 }
5911 5961 if (create)
5912 5962 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5913 5963 "ext-family", CPI_FAMILY_XTD(cpi));
5914 5964
5915 5965 /* ext-model */
5916 5966 switch (cpi->cpi_vendor) {
5917 5967 case X86_VENDOR_Intel:
5918 5968 create = IS_EXTENDED_MODEL_INTEL(cpi);
5919 5969 break;
5920 5970 case X86_VENDOR_AMD:
5921 5971 create = CPI_FAMILY(cpi) == 0xf;
5922 5972 break;
5923 5973 default:
5924 5974 create = 0;
5925 5975 break;
5926 5976 }
5927 5977 if (create)
5928 5978 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5929 5979 "ext-model", CPI_MODEL_XTD(cpi));
5930 5980
5931 5981 /* generation */
5932 5982 switch (cpi->cpi_vendor) {
5933 5983 case X86_VENDOR_AMD:
5934 5984 /*
5935 5985 * AMD K5 model 1 was the first part to support this
5936 5986 */
5937 5987 create = cpi->cpi_xmaxeax >= 0x80000001;
5938 5988 break;
5939 5989 default:
5940 5990 create = 0;
5941 5991 break;
5942 5992 }
5943 5993 if (create)
5944 5994 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5945 5995 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
5946 5996
5947 5997 /* brand-id */
5948 5998 switch (cpi->cpi_vendor) {
5949 5999 case X86_VENDOR_Intel:
5950 6000 /*
5951 6001 * brand id first appeared on Pentium III Xeon model 8,
5952 6002 * and Celeron model 8 processors and Opteron
5953 6003 */
5954 6004 create = cpi->cpi_family > 6 ||
5955 6005 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
5956 6006 break;
5957 6007 case X86_VENDOR_AMD:
5958 6008 create = cpi->cpi_family >= 0xf;
5959 6009 break;
5960 6010 default:
5961 6011 create = 0;
5962 6012 break;
5963 6013 }
5964 6014 if (create && cpi->cpi_brandid != 0) {
5965 6015 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5966 6016 "brand-id", cpi->cpi_brandid);
5967 6017 }
5968 6018
5969 6019 /* chunks, and apic-id */
5970 6020 switch (cpi->cpi_vendor) {
5971 6021 /*
5972 6022 * first available on Pentium IV and Opteron (K8)
5973 6023 */
5974 6024 case X86_VENDOR_Intel:
5975 6025 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
5976 6026 break;
5977 6027 case X86_VENDOR_AMD:
5978 6028 create = cpi->cpi_family >= 0xf;
5979 6029 break;
5980 6030 default:
5981 6031 create = 0;
5982 6032 break;
5983 6033 }
5984 6034 if (create) {
5985 6035 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5986 6036 "chunks", CPI_CHUNKS(cpi));
5987 6037 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5988 6038 "apic-id", cpi->cpi_apicid);
5989 6039 if (cpi->cpi_chipid >= 0) {
5990 6040 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5991 6041 "chip#", cpi->cpi_chipid);
5992 6042 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5993 6043 "clog#", cpi->cpi_clogid);
5994 6044 }
5995 6045 }
5996 6046
5997 6047 /* cpuid-features */
5998 6048 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
5999 6049 "cpuid-features", CPI_FEATURES_EDX(cpi));
6000 6050
6001 6051
6002 6052 /* cpuid-features-ecx */
6003 6053 switch (cpi->cpi_vendor) {
6004 6054 case X86_VENDOR_Intel:
6005 6055 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
6006 6056 break;
6007 6057 case X86_VENDOR_AMD:
6008 6058 create = cpi->cpi_family >= 0xf;
6009 6059 break;
6010 6060 default:
6011 6061 create = 0;
6012 6062 break;
6013 6063 }
6014 6064 if (create)
6015 6065 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6016 6066 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
6017 6067
6018 6068 /* ext-cpuid-features */
6019 6069 switch (cpi->cpi_vendor) {
6020 6070 case X86_VENDOR_Intel:
6021 6071 case X86_VENDOR_AMD:
6022 6072 case X86_VENDOR_Cyrix:
6023 6073 case X86_VENDOR_TM:
6024 6074 case X86_VENDOR_Centaur:
6025 6075 create = cpi->cpi_xmaxeax >= 0x80000001;
6026 6076 break;
6027 6077 default:
6028 6078 create = 0;
6029 6079 break;
6030 6080 }
6031 6081 if (create) {
6032 6082 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6033 6083 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
6034 6084 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
6035 6085 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
6036 6086 }
6037 6087
6038 6088 /*
6039 6089 * Brand String first appeared in Intel Pentium IV, AMD K5
6040 6090 * model 1, and Cyrix GXm. On earlier models we try and
6041 6091 * simulate something similar .. so this string should always
6042 6092 * same -something- about the processor, however lame.
6043 6093 */
6044 6094 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
6045 6095 "brand-string", cpi->cpi_brandstr);
6046 6096
6047 6097 /*
6048 6098 * Finally, cache and tlb information
6049 6099 */
6050 6100 switch (x86_which_cacheinfo(cpi)) {
6051 6101 case X86_VENDOR_Intel:
6052 6102 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6053 6103 break;
6054 6104 case X86_VENDOR_Cyrix:
6055 6105 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
6056 6106 break;
6057 6107 case X86_VENDOR_AMD:
6058 6108 amd_cache_info(cpi, cpu_devi);
6059 6109 break;
6060 6110 default:
6061 6111 break;
6062 6112 }
6063 6113 }
6064 6114
6065 6115 struct l2info {
6066 6116 int *l2i_csz;
6067 6117 int *l2i_lsz;
6068 6118 int *l2i_assoc;
6069 6119 int l2i_ret;
6070 6120 };
6071 6121
6072 6122 /*
6073 6123 * A cacheinfo walker that fetches the size, line-size and associativity
6074 6124 * of the L2 cache
6075 6125 */
6076 6126 static int
6077 6127 intel_l2cinfo(void *arg, const struct cachetab *ct)
6078 6128 {
6079 6129 struct l2info *l2i = arg;
6080 6130 int *ip;
6081 6131
6082 6132 if (ct->ct_label != l2_cache_str &&
6083 6133 ct->ct_label != sl2_cache_str)
6084 6134 return (0); /* not an L2 -- keep walking */
6085 6135
6086 6136 if ((ip = l2i->l2i_csz) != NULL)
6087 6137 *ip = ct->ct_size;
6088 6138 if ((ip = l2i->l2i_lsz) != NULL)
6089 6139 *ip = ct->ct_line_size;
6090 6140 if ((ip = l2i->l2i_assoc) != NULL)
6091 6141 *ip = ct->ct_assoc;
6092 6142 l2i->l2i_ret = ct->ct_size;
6093 6143 return (1); /* was an L2 -- terminate walk */
6094 6144 }
6095 6145
6096 6146 /*
6097 6147 * AMD L2/L3 Cache and TLB Associativity Field Definition:
6098 6148 *
6099 6149 * Unlike the associativity for the L1 cache and tlb where the 8 bit
6100 6150 * value is the associativity, the associativity for the L2 cache and
6101 6151 * tlb is encoded in the following table. The 4 bit L2 value serves as
6102 6152 * an index into the amd_afd[] array to determine the associativity.
6103 6153 * -1 is undefined. 0 is fully associative.
6104 6154 */
6105 6155
6106 6156 static int amd_afd[] =
6107 6157 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
6108 6158
6109 6159 static void
6110 6160 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
6111 6161 {
6112 6162 struct cpuid_regs *cp;
6113 6163 uint_t size, assoc;
6114 6164 int i;
6115 6165 int *ip;
6116 6166
6117 6167 if (cpi->cpi_xmaxeax < 0x80000006)
6118 6168 return;
6119 6169 cp = &cpi->cpi_extd[6];
6120 6170
6121 6171 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
6122 6172 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
6123 6173 uint_t cachesz = size * 1024;
6124 6174 assoc = amd_afd[i];
6125 6175
6126 6176 ASSERT(assoc != -1);
6127 6177
6128 6178 if ((ip = l2i->l2i_csz) != NULL)
6129 6179 *ip = cachesz;
6130 6180 if ((ip = l2i->l2i_lsz) != NULL)
6131 6181 *ip = BITX(cp->cp_ecx, 7, 0);
6132 6182 if ((ip = l2i->l2i_assoc) != NULL)
6133 6183 *ip = assoc;
6134 6184 l2i->l2i_ret = cachesz;
6135 6185 }
6136 6186 }
6137 6187
6138 6188 int
6139 6189 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
6140 6190 {
6141 6191 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6142 6192 struct l2info __l2info, *l2i = &__l2info;
6143 6193
6144 6194 l2i->l2i_csz = csz;
6145 6195 l2i->l2i_lsz = lsz;
6146 6196 l2i->l2i_assoc = assoc;
6147 6197 l2i->l2i_ret = -1;
6148 6198
6149 6199 switch (x86_which_cacheinfo(cpi)) {
6150 6200 case X86_VENDOR_Intel:
6151 6201 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6152 6202 break;
6153 6203 case X86_VENDOR_Cyrix:
6154 6204 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
6155 6205 break;
6156 6206 case X86_VENDOR_AMD:
6157 6207 amd_l2cacheinfo(cpi, l2i);
6158 6208 break;
6159 6209 default:
6160 6210 break;
6161 6211 }
6162 6212 return (l2i->l2i_ret);
6163 6213 }
6164 6214
6165 6215 #if !defined(__xpv)
6166 6216
6167 6217 uint32_t *
6168 6218 cpuid_mwait_alloc(cpu_t *cpu)
6169 6219 {
6170 6220 uint32_t *ret;
6171 6221 size_t mwait_size;
6172 6222
6173 6223 ASSERT(cpuid_checkpass(CPU, 2));
6174 6224
6175 6225 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
6176 6226 if (mwait_size == 0)
6177 6227 return (NULL);
6178 6228
6179 6229 /*
6180 6230 * kmem_alloc() returns cache line size aligned data for mwait_size
6181 6231 * allocations. mwait_size is currently cache line sized. Neither
6182 6232 * of these implementation details are guarantied to be true in the
6183 6233 * future.
6184 6234 *
6185 6235 * First try allocating mwait_size as kmem_alloc() currently returns
6186 6236 * correctly aligned memory. If kmem_alloc() does not return
6187 6237 * mwait_size aligned memory, then use mwait_size ROUNDUP.
6188 6238 *
6189 6239 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
6190 6240 * decide to free this memory.
6191 6241 */
6192 6242 ret = kmem_zalloc(mwait_size, KM_SLEEP);
6193 6243 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
6194 6244 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6195 6245 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
6196 6246 *ret = MWAIT_RUNNING;
6197 6247 return (ret);
6198 6248 } else {
6199 6249 kmem_free(ret, mwait_size);
6200 6250 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
6201 6251 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
6202 6252 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
6203 6253 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
6204 6254 *ret = MWAIT_RUNNING;
6205 6255 return (ret);
6206 6256 }
6207 6257 }
6208 6258
6209 6259 void
6210 6260 cpuid_mwait_free(cpu_t *cpu)
6211 6261 {
6212 6262 if (cpu->cpu_m.mcpu_cpi == NULL) {
6213 6263 return;
6214 6264 }
6215 6265
6216 6266 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
6217 6267 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
6218 6268 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
6219 6269 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
6220 6270 }
6221 6271
6222 6272 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
6223 6273 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
6224 6274 }
6225 6275
6226 6276 void
6227 6277 patch_tsc_read(int flag)
6228 6278 {
6229 6279 size_t cnt;
6230 6280
6231 6281 switch (flag) {
6232 6282 case TSC_NONE:
6233 6283 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
6234 6284 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
6235 6285 break;
6236 6286 case TSC_RDTSC_MFENCE:
6237 6287 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
6238 6288 (void) memcpy((void *)tsc_read,
6239 6289 (void *)&_tsc_mfence_start, cnt);
6240 6290 break;
6241 6291 case TSC_RDTSC_LFENCE:
6242 6292 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
6243 6293 (void) memcpy((void *)tsc_read,
6244 6294 (void *)&_tsc_lfence_start, cnt);
6245 6295 break;
6246 6296 case TSC_TSCP:
6247 6297 cnt = &_tscp_end - &_tscp_start;
6248 6298 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
6249 6299 break;
6250 6300 default:
6251 6301 /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
6252 6302 cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
6253 6303 break;
6254 6304 }
6255 6305 tsc_type = flag;
6256 6306 }
6257 6307
6258 6308 int
6259 6309 cpuid_deep_cstates_supported(void)
6260 6310 {
6261 6311 struct cpuid_info *cpi;
6262 6312 struct cpuid_regs regs;
6263 6313
6264 6314 ASSERT(cpuid_checkpass(CPU, 1));
6265 6315
6266 6316 cpi = CPU->cpu_m.mcpu_cpi;
6267 6317
6268 6318 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
6269 6319 return (0);
6270 6320
6271 6321 switch (cpi->cpi_vendor) {
6272 6322 case X86_VENDOR_Intel:
6273 6323 if (cpi->cpi_xmaxeax < 0x80000007)
6274 6324 return (0);
6275 6325
6276 6326 /*
6277 6327 * TSC run at a constant rate in all ACPI C-states?
6278 6328 */
6279 6329 regs.cp_eax = 0x80000007;
6280 6330 (void) __cpuid_insn(®s);
6281 6331 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
6282 6332
6283 6333 default:
6284 6334 return (0);
6285 6335 }
6286 6336 }
6287 6337
6288 6338 #endif /* !__xpv */
6289 6339
6290 6340 void
6291 6341 post_startup_cpu_fixups(void)
6292 6342 {
6293 6343 #ifndef __xpv
6294 6344 /*
6295 6345 * Some AMD processors support C1E state. Entering this state will
6296 6346 * cause the local APIC timer to stop, which we can't deal with at
6297 6347 * this time.
6298 6348 */
6299 6349 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
6300 6350 on_trap_data_t otd;
6301 6351 uint64_t reg;
6302 6352
6303 6353 if (!on_trap(&otd, OT_DATA_ACCESS)) {
6304 6354 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
6305 6355 /* Disable C1E state if it is enabled by BIOS */
6306 6356 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
6307 6357 AMD_ACTONCMPHALT_MASK) {
6308 6358 reg &= ~(AMD_ACTONCMPHALT_MASK <<
6309 6359 AMD_ACTONCMPHALT_SHIFT);
6310 6360 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
6311 6361 }
6312 6362 }
6313 6363 no_trap();
6314 6364 }
6315 6365 #endif /* !__xpv */
6316 6366 }
6317 6367
6318 6368 void
6319 6369 enable_pcid(void)
6320 6370 {
6321 6371 if (x86_use_pcid == -1)
6322 6372 x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);
6323 6373
6324 6374 if (x86_use_invpcid == -1) {
6325 6375 x86_use_invpcid = is_x86_feature(x86_featureset,
6326 6376 X86FSET_INVPCID);
6327 6377 }
6328 6378
6329 6379 if (!x86_use_pcid)
6330 6380 return;
6331 6381
6332 6382 /*
6333 6383 * Intel say that on setting PCIDE, it immediately starts using the PCID
6334 6384 * bits; better make sure there's nothing there.
6335 6385 */
6336 6386 ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);
6337 6387
6338 6388 setcr4(getcr4() | CR4_PCIDE);
6339 6389 }
6340 6390
6341 6391 /*
6342 6392 * Setup necessary registers to enable XSAVE feature on this processor.
6343 6393 * This function needs to be called early enough, so that no xsave/xrstor
6344 6394 * ops will execute on the processor before the MSRs are properly set up.
6345 6395 *
6346 6396 * Current implementation has the following assumption:
6347 6397 * - cpuid_pass1() is done, so that X86 features are known.
6348 6398 * - fpu_probe() is done, so that fp_save_mech is chosen.
6349 6399 */
6350 6400 void
6351 6401 xsave_setup_msr(cpu_t *cpu)
6352 6402 {
6353 6403 ASSERT(fp_save_mech == FP_XSAVE);
6354 6404 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
6355 6405
6356 6406 /* Enable OSXSAVE in CR4. */
6357 6407 setcr4(getcr4() | CR4_OSXSAVE);
6358 6408 /*
6359 6409 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
6360 6410 * correct value.
6361 6411 */
6362 6412 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
6363 6413 setup_xfem();
6364 6414 }
6365 6415
6366 6416 /*
6367 6417 * Starting with the Westmere processor the local
6368 6418 * APIC timer will continue running in all C-states,
6369 6419 * including the deepest C-states.
6370 6420 */
6371 6421 int
6372 6422 cpuid_arat_supported(void)
6373 6423 {
6374 6424 struct cpuid_info *cpi;
6375 6425 struct cpuid_regs regs;
6376 6426
6377 6427 ASSERT(cpuid_checkpass(CPU, 1));
6378 6428 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6379 6429
6380 6430 cpi = CPU->cpu_m.mcpu_cpi;
6381 6431
6382 6432 switch (cpi->cpi_vendor) {
6383 6433 case X86_VENDOR_Intel:
6384 6434 /*
6385 6435 * Always-running Local APIC Timer is
6386 6436 * indicated by CPUID.6.EAX[2].
6387 6437 */
6388 6438 if (cpi->cpi_maxeax >= 6) {
6389 6439 regs.cp_eax = 6;
6390 6440 (void) cpuid_insn(NULL, ®s);
6391 6441 return (regs.cp_eax & CPUID_CSTATE_ARAT);
6392 6442 } else {
6393 6443 return (0);
6394 6444 }
6395 6445 default:
6396 6446 return (0);
6397 6447 }
6398 6448 }
6399 6449
6400 6450 /*
6401 6451 * Check support for Intel ENERGY_PERF_BIAS feature
6402 6452 */
6403 6453 int
6404 6454 cpuid_iepb_supported(struct cpu *cp)
6405 6455 {
6406 6456 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
6407 6457 struct cpuid_regs regs;
6408 6458
6409 6459 ASSERT(cpuid_checkpass(cp, 1));
6410 6460
6411 6461 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
6412 6462 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
6413 6463 return (0);
6414 6464 }
6415 6465
6416 6466 /*
6417 6467 * Intel ENERGY_PERF_BIAS MSR is indicated by
6418 6468 * capability bit CPUID.6.ECX.3
6419 6469 */
6420 6470 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
6421 6471 return (0);
6422 6472
6423 6473 regs.cp_eax = 0x6;
6424 6474 (void) cpuid_insn(NULL, ®s);
6425 6475 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
6426 6476 }
6427 6477
6428 6478 /*
6429 6479 * Check support for TSC deadline timer
6430 6480 *
6431 6481 * TSC deadline timer provides a superior software programming
6432 6482 * model over local APIC timer that eliminates "time drifts".
6433 6483 * Instead of specifying a relative time, software specifies an
6434 6484 * absolute time as the target at which the processor should
6435 6485 * generate a timer event.
6436 6486 */
6437 6487 int
6438 6488 cpuid_deadline_tsc_supported(void)
6439 6489 {
6440 6490 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
6441 6491 struct cpuid_regs regs;
6442 6492
6443 6493 ASSERT(cpuid_checkpass(CPU, 1));
6444 6494 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
6445 6495
6446 6496 switch (cpi->cpi_vendor) {
6447 6497 case X86_VENDOR_Intel:
6448 6498 if (cpi->cpi_maxeax >= 1) {
6449 6499 regs.cp_eax = 1;
6450 6500 (void) cpuid_insn(NULL, ®s);
6451 6501 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
6452 6502 } else {
6453 6503 return (0);
6454 6504 }
6455 6505 default:
6456 6506 return (0);
6457 6507 }
6458 6508 }
6459 6509
6460 6510 #if defined(__amd64) && !defined(__xpv)
6461 6511 /*
6462 6512 * Patch in versions of bcopy for high performance Intel Nhm processors
6463 6513 * and later...
6464 6514 */
6465 6515 void
6466 6516 patch_memops(uint_t vendor)
6467 6517 {
6468 6518 size_t cnt, i;
6469 6519 caddr_t to, from;
6470 6520
6471 6521 if ((vendor == X86_VENDOR_Intel) &&
6472 6522 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
6473 6523 cnt = &bcopy_patch_end - &bcopy_patch_start;
6474 6524 to = &bcopy_ck_size;
6475 6525 from = &bcopy_patch_start;
6476 6526 for (i = 0; i < cnt; i++) {
6477 6527 *to++ = *from++;
6478 6528 }
6479 6529 }
6480 6530 }
6481 6531 #endif /* __amd64 && !__xpv */
6482 6532
6483 6533 /*
6484 6534 * We're being asked to tell the system how many bits are required to represent
6485 6535 * the various thread and strand IDs. While it's tempting to derive this based
6486 6536 * on the values in cpi_ncore_per_chip and cpi_ncpu_per_chip, that isn't quite
6487 6537 * correct. Instead, this needs to be based on the number of bits that the APIC
6488 6538 * allows for these different configurations. We only update these to a larger
6489 6539 * value if we find one.
6490 6540 */
6491 6541 void
6492 6542 cpuid_get_ext_topo(cpu_t *cpu, uint_t *core_nbits, uint_t *strand_nbits)
6493 6543 {
6494 6544 struct cpuid_info *cpi;
6495 6545
6496 6546 VERIFY(cpuid_checkpass(CPU, 1));
6497 6547 cpi = cpu->cpu_m.mcpu_cpi;
6498 6548
6499 6549 if (cpi->cpi_ncore_bits > *core_nbits) {
6500 6550 *core_nbits = cpi->cpi_ncore_bits;
6501 6551 }
6502 6552
6503 6553 if (cpi->cpi_nthread_bits > *strand_nbits) {
6504 6554 *strand_nbits = cpi->cpi_nthread_bits;
6505 6555 }
6506 6556 }
6507 6557
6508 6558 void
6509 6559 cpuid_pass_ucode(cpu_t *cpu, uchar_t *fset)
6510 6560 {
6511 6561 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
6512 6562 struct cpuid_regs cp;
6513 6563
6514 6564 /*
6515 6565 * Reread the CPUID portions that we need for various security
6516 6566 * information.
6517 6567 */
6518 6568 if (cpi->cpi_vendor == X86_VENDOR_Intel) {
6519 6569 /*
6520 6570 * Check if we now have leaf 7 available to us.
6521 6571 */
6522 6572 if (cpi->cpi_maxeax < 7) {
6523 6573 bzero(&cp, sizeof (cp));
6524 6574 cp.cp_eax = 0;
6525 6575 cpi->cpi_maxeax = __cpuid_insn(&cp);
6526 6576 if (cpi->cpi_maxeax < 7)
6527 6577 return;
6528 6578 }
6529 6579
6530 6580 bzero(&cp, sizeof (cp));
6531 6581 cp.cp_eax = 7;
6532 6582 cp.cp_ecx = 0;
6533 6583 (void) __cpuid_insn(&cp);
6534 6584 cpi->cpi_std[7] = cp;
6535 6585 } else if (cpi->cpi_vendor == X86_VENDOR_AMD) {
6536 6586 /* No xcpuid support */
6537 6587 if (cpi->cpi_family < 5 ||
6538 6588 (cpi->cpi_family == 5 && cpi->cpi_model < 1))
6539 6589 return;
6540 6590
6541 6591 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6542 6592 bzero(&cp, sizeof (cp));
6543 6593 cp.cp_eax = CPUID_LEAF_EXT_0;
6544 6594 cpi->cpi_xmaxeax = __cpuid_insn(&cp);
6545 6595 if (cpi->cpi_xmaxeax < CPUID_LEAF_EXT_8) {
6546 6596 return;
6547 6597 }
6548 6598 }
6549 6599
6550 6600 bzero(&cp, sizeof (cp));
6551 6601 cp.cp_eax = CPUID_LEAF_EXT_8;
6552 6602 (void) __cpuid_insn(&cp);
6553 6603 platform_cpuid_mangle(cpi->cpi_vendor, CPUID_LEAF_EXT_8, &cp);
6554 6604 cpi->cpi_extd[8] = cp;
6555 6605 } else {
6556 6606 /*
6557 6607 * Nothing to do here. Return an empty set which has already
6558 6608 * been zeroed for us.
6559 6609 */
6560 6610 return;
6561 6611 }
6562 6612 cpuid_scan_security(cpu, fset);
6563 6613 }
6564 6614
6565 6615 /* ARGSUSED */
6566 6616 static int
6567 6617 cpuid_post_ucodeadm_xc(xc_arg_t arg0, xc_arg_t arg1, xc_arg_t arg2)
6568 6618 {
6569 6619 uchar_t *fset;
6570 6620
6571 6621 fset = (uchar_t *)(arg0 + sizeof (x86_featureset) * CPU->cpu_id);
6572 6622 cpuid_pass_ucode(CPU, fset);
6573 6623
6574 6624 return (0);
6575 6625 }
6576 6626
6577 6627 /*
6578 6628 * After a microcode update where the version has changed, then we need to
6579 6629 * rescan CPUID. To do this we check every CPU to make sure that they have the
6580 6630 * same microcode. Then we perform a cross call to all such CPUs. It's the
6581 6631 * caller's job to make sure that no one else can end up doing an update while
6582 6632 * this is going on.
6583 6633 *
6584 6634 * We assume that the system is microcode capable if we're called.
6585 6635 */
6586 6636 void
6587 6637 cpuid_post_ucodeadm(void)
6588 6638 {
6589 6639 uint32_t rev;
6590 6640 int i;
6591 6641 struct cpu *cpu;
6592 6642 cpuset_t cpuset;
6593 6643 void *argdata;
6594 6644 uchar_t *f0;
6595 6645
6596 6646 argdata = kmem_zalloc(sizeof (x86_featureset) * NCPU, KM_SLEEP);
6597 6647
6598 6648 mutex_enter(&cpu_lock);
6599 6649 cpu = cpu_get(0);
6600 6650 rev = cpu->cpu_m.mcpu_ucode_info->cui_rev;
6601 6651 CPUSET_ONLY(cpuset, 0);
6602 6652 for (i = 1; i < max_ncpus; i++) {
6603 6653 if ((cpu = cpu_get(i)) == NULL)
6604 6654 continue;
6605 6655
6606 6656 if (cpu->cpu_m.mcpu_ucode_info->cui_rev != rev) {
6607 6657 panic("post microcode update CPU %d has differing "
6608 6658 "microcode revision (%u) from CPU 0 (%u)",
6609 6659 i, cpu->cpu_m.mcpu_ucode_info->cui_rev, rev);
6610 6660 }
6611 6661 CPUSET_ADD(cpuset, i);
6612 6662 }
6613 6663
6614 6664 kpreempt_disable();
6615 6665 xc_sync((xc_arg_t)argdata, 0, 0, CPUSET2BV(cpuset),
6616 6666 cpuid_post_ucodeadm_xc);
6617 6667 kpreempt_enable();
6618 6668
6619 6669 /*
6620 6670 * OK, now look at each CPU and see if their feature sets are equal.
6621 6671 */
6622 6672 f0 = argdata;
6623 6673 for (i = 1; i < max_ncpus; i++) {
6624 6674 uchar_t *fset;
6625 6675 if (!CPU_IN_SET(cpuset, i))
6626 6676 continue;
6627 6677
6628 6678 fset = (uchar_t *)((uintptr_t)argdata +
6629 6679 sizeof (x86_featureset) * i);
6630 6680
6631 6681 if (!compare_x86_featureset(f0, fset)) {
6632 6682 panic("Post microcode update CPU %d has "
6633 6683 "differing security feature (%p) set from CPU 0 "
6634 6684 "(%p), not appending to feature set", i,
6635 6685 (void *)fset, (void *)f0);
6636 6686 }
6637 6687 }
6638 6688
6639 6689 mutex_exit(&cpu_lock);
6640 6690
6641 6691 for (i = 0; i < NUM_X86_FEATURES; i++) {
6642 6692 cmn_err(CE_CONT, "?post-ucode x86_feature: %s\n",
6643 6693 x86_feature_names[i]);
6644 6694 if (is_x86_feature(f0, i)) {
6645 6695 add_x86_feature(x86_featureset, i);
6646 6696 }
6647 6697 }
6648 6698 kmem_free(argdata, sizeof (x86_featureset) * NCPU);
6649 6699 }
↓ open down ↓ |
4432 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX