6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 */
27 /*
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
30 */
31 /*
32 * Portions Copyright 2009 Advanced Micro Devices, Inc.
33 */
34 /*
35 * Copyright 2019 Joyent, Inc.
36 */
37
38 /*
39 * CPU Identification logic
40 *
41 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
42 * with the identification of CPUs, their features, and their topologies. More
43 * specifically, this file helps drive the following:
44 *
45 * 1. Enumeration of features of the processor which are used by the kernel to
46 * determine what features to enable or disable. These may be instruction set
47 * enhancements or features that we use.
48 *
49 * 2. Enumeration of instruction set architecture (ISA) additions that userland
50 * will be told about through the auxiliary vector.
51 *
52 * 3. Understanding the physical topology of the CPU such as the number of
53 * caches, how many cores it has, whether or not it supports symmetric
54 * multi-processing (SMT), etc.
55 *
56 * ------------------------
1169 * flush the microarchitectural state before the CPU goes idles by calling hlt,
1170 * mwait, or another ACPI method. To perform these flushes, we call
1171 * x86_md_clear() at all of these transition points.
1172 *
1173 * If hardware enumerates RDCL_NO, indicating that it is not vulnerable to L1TF,
1174 * then we change the spec_uarch_flush() function to point to x86_md_clear(). If
1175 * MDS_NO has been set, then this is fully mitigated and x86_md_clear() becomes
1176 * a no-op.
1177 *
1178 * Unfortunately, with this issue hyperthreading rears its ugly head. In
1179 * particular, everything we've discussed above is only valid for a single
1180 * thread executing on a core. In the case where you have hyper-threading
1181 * present, this attack can be performed between threads. The theoretical fix
1182 * for this is to ensure that both threads are always in the same security
1183 * domain. This means that they are executing in the same ring and mutually
1184 * trust each other. Practically speaking, this would mean that a system call
1185 * would have to issue an inter-processor interrupt (IPI) to the other thread.
1186 * Rather than implement this, we recommend that one disables hyper-threading
1187 * through the use of psradm -aS.
1188 *
1189 * SUMMARY
1190 *
1191 * The following table attempts to summarize the mitigations for various issues
1192 * and what's done in various places:
1193 *
1194 * - Spectre v1: Not currently mitigated
1195 * - swapgs: lfences after swapgs paths
1196 * - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support
1197 * - Meltdown: Kernel Page Table Isolation
1198 * - Spectre v3a: Updated CPU microcode
1199 * - Spectre v4: Not currently mitigated
1200 * - SpectreRSB: SMEP and RSB Stuffing
1201 * - L1TF: spec_uarch_flush, SMT exclusion, requires microcode
1202 * - MDS: x86_md_clear, requires microcode, disabling hyper threading
1203 *
1204 * The following table indicates the x86 feature set bits that indicate that a
1205 * given problem has been solved or a notable feature is present:
1206 *
1207 * - RDCL_NO: Meltdown, L1TF, MSBDS subset of MDS
1208 * - MDS_NO: All forms of MDS
1209 */
1210
1211 #include <sys/types.h>
1212 #include <sys/archsystm.h>
1213 #include <sys/x86_archext.h>
1214 #include <sys/kmem.h>
1215 #include <sys/systm.h>
1216 #include <sys/cmn_err.h>
1217 #include <sys/sunddi.h>
1218 #include <sys/sunndi.h>
1219 #include <sys/cpuvar.h>
1220 #include <sys/processor.h>
1221 #include <sys/sysmacros.h>
1222 #include <sys/pg.h>
1223 #include <sys/fp.h>
1224 #include <sys/controlregs.h>
1225 #include <sys/bitmap.h>
1226 #include <sys/auxv_386.h>
1227 #include <sys/memnode.h>
1228 #include <sys/pci_cfgspace.h>
1245
1246 #if defined(__xpv)
1247 int x86_use_pcid = 0;
1248 int x86_use_invpcid = 0;
1249 #else
1250 int x86_use_pcid = -1;
1251 int x86_use_invpcid = -1;
1252 #endif
1253
1254 typedef enum {
1255 X86_SPECTREV2_RETPOLINE,
1256 X86_SPECTREV2_RETPOLINE_AMD,
1257 X86_SPECTREV2_ENHANCED_IBRS,
1258 X86_SPECTREV2_DISABLED
1259 } x86_spectrev2_mitigation_t;
1260
1261 uint_t x86_disable_spectrev2 = 0;
1262 static x86_spectrev2_mitigation_t x86_spectrev2_mitigation =
1263 X86_SPECTREV2_RETPOLINE;
1264
1265 uint_t pentiumpro_bug4046376;
1266
1267 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
1268
1269 static char *x86_feature_names[NUM_X86_FEATURES] = {
1270 "lgpg",
1271 "tsc",
1272 "msr",
1273 "mtrr",
1274 "pge",
1275 "de",
1276 "cmov",
1277 "mmx",
1278 "mca",
1279 "pae",
1280 "cv8",
1281 "pat",
1282 "sep",
1283 "sse",
1284 "sse2",
1346 "rdcl_no",
1347 "ibrs_all",
1348 "rsba",
1349 "ssb_no",
1350 "stibp_all",
1351 "flush_cmd",
1352 "l1d_vmentry_no",
1353 "fsgsbase",
1354 "clflushopt",
1355 "clwb",
1356 "monitorx",
1357 "clzero",
1358 "xop",
1359 "fma4",
1360 "tbm",
1361 "avx512_vnni",
1362 "amd_pcec",
1363 "mb_clear",
1364 "mds_no",
1365 "core_thermal",
1366 "pkg_thermal"
1367 };
1368
1369 boolean_t
1370 is_x86_feature(void *featureset, uint_t feature)
1371 {
1372 ASSERT(feature < NUM_X86_FEATURES);
1373 return (BT_TEST((ulong_t *)featureset, feature));
1374 }
1375
1376 void
1377 add_x86_feature(void *featureset, uint_t feature)
1378 {
1379 ASSERT(feature < NUM_X86_FEATURES);
1380 BT_SET((ulong_t *)featureset, feature);
1381 }
1382
1383 void
1384 remove_x86_feature(void *featureset, uint_t feature)
1385 {
1386 ASSERT(feature < NUM_X86_FEATURES);
2685 * friendly way. As such, try to read and set the MSR. If we can then
2686 * read back the value we set (it wasn't just set to zero), then we go
2687 * for it.
2688 */
2689 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2690 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2691 val |= AMD_DECODE_CONFIG_LFENCE_DISPATCH;
2692 wrmsr(MSR_AMD_DECODE_CONFIG, val);
2693 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2694 } else {
2695 val = 0;
2696 }
2697 no_trap();
2698
2699 if ((val & AMD_DECODE_CONFIG_LFENCE_DISPATCH) != 0)
2700 return (B_TRUE);
2701 return (B_FALSE);
2702 }
2703 #endif /* !__xpv */
2704
2705 static void
2706 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2707 {
2708 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2709 x86_spectrev2_mitigation_t v2mit;
2710
2711 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2712 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2713 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2714 add_x86_feature(featureset, X86FSET_IBPB);
2715 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2716 add_x86_feature(featureset, X86FSET_IBRS);
2717 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2718 add_x86_feature(featureset, X86FSET_STIBP);
2719 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2720 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2721 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2722 add_x86_feature(featureset, X86FSET_SSBD);
2723 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2724 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2725 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2774 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2775 add_x86_feature(featureset,
2776 X86FSET_IBRS_ALL);
2777 }
2778 if (reg & IA32_ARCH_CAP_RSBA) {
2779 add_x86_feature(featureset,
2780 X86FSET_RSBA);
2781 }
2782 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2783 add_x86_feature(featureset,
2784 X86FSET_L1D_VM_NO);
2785 }
2786 if (reg & IA32_ARCH_CAP_SSB_NO) {
2787 add_x86_feature(featureset,
2788 X86FSET_SSB_NO);
2789 }
2790 if (reg & IA32_ARCH_CAP_MDS_NO) {
2791 add_x86_feature(featureset,
2792 X86FSET_MDS_NO);
2793 }
2794 }
2795 no_trap();
2796 }
2797 #endif /* !__xpv */
2798
2799 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2800 add_x86_feature(featureset, X86FSET_SSBD);
2801
2802 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2803 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2804 }
2805
2806 if (cpu->cpu_id != 0) {
2807 if (x86_spectrev2_mitigation == X86_SPECTREV2_ENHANCED_IBRS) {
2808 cpuid_enable_enhanced_ibrs();
2809 }
2810 return;
2811 }
2812
2813 /*
2814 * Go through and initialize various security mechanisms that we should
2815 * only do on a single CPU. This includes Spectre V2, L1TF, and MDS.
2816 */
2817
2818 /*
2819 * By default we've come in with retpolines enabled. Check whether we
2820 * should disable them or enable enhanced IBRS. RSB stuffing is enabled
2821 * by default, but disabled if we are using enhanced IBRS.
2822 */
2823 if (x86_disable_spectrev2 != 0) {
2824 v2mit = X86_SPECTREV2_DISABLED;
2825 } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
2826 cpuid_enable_enhanced_ibrs();
2827 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
2828 #ifndef __xpv
2829 } else if (cpuid_use_amd_retpoline(cpi)) {
2830 v2mit = X86_SPECTREV2_RETPOLINE_AMD;
2831 #endif /* !__xpv */
2832 } else {
2833 v2mit = X86_SPECTREV2_RETPOLINE;
2834 }
2835
2845 *
2846 * If any of these are present, then we need to flush u-arch state at
2847 * various points. For MDS, we need to do so whenever we change to a
2848 * lesser privilege level or we are halting the CPU. For L1TF we need to
2849 * flush the L1D cache at VM entry. When we have microcode that handles
2850 * MDS, the L1D flush also clears the other u-arch state that the
2851 * md_clear does.
2852 */
2853
2854 /*
2855 * Update whether or not we need to be taking explicit action against
2856 * MDS.
2857 */
2858 cpuid_update_md_clear(cpu, featureset);
2859
2860 /*
2861 * Determine whether SMT exclusion is required and whether or not we
2862 * need to perform an l1d flush.
2863 */
2864 cpuid_update_l1d_flush(cpu, featureset);
2865 }
2866
2867 /*
2868 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2869 */
2870 void
2871 setup_xfem(void)
2872 {
2873 uint64_t flags = XFEATURE_LEGACY_FP;
2874
2875 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2876
2877 if (is_x86_feature(x86_featureset, X86FSET_SSE))
2878 flags |= XFEATURE_SSE;
2879
2880 if (is_x86_feature(x86_featureset, X86FSET_AVX))
2881 flags |= XFEATURE_AVX;
2882
2883 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2884 flags |= XFEATURE_AVX512;
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
26 * Copyright 2020 Joyent, Inc.
27 */
28 /*
29 * Copyright (c) 2010, Intel Corporation.
30 * All rights reserved.
31 */
32 /*
33 * Portions Copyright 2009 Advanced Micro Devices, Inc.
34 */
35
36 /*
37 * CPU Identification logic
38 *
39 * The purpose of this file and its companion, cpuid_subr.c, is to help deal
40 * with the identification of CPUs, their features, and their topologies. More
41 * specifically, this file helps drive the following:
42 *
43 * 1. Enumeration of features of the processor which are used by the kernel to
44 * determine what features to enable or disable. These may be instruction set
45 * enhancements or features that we use.
46 *
47 * 2. Enumeration of instruction set architecture (ISA) additions that userland
48 * will be told about through the auxiliary vector.
49 *
50 * 3. Understanding the physical topology of the CPU such as the number of
51 * caches, how many cores it has, whether or not it supports symmetric
52 * multi-processing (SMT), etc.
53 *
54 * ------------------------
1167 * flush the microarchitectural state before the CPU goes idles by calling hlt,
1168 * mwait, or another ACPI method. To perform these flushes, we call
1169 * x86_md_clear() at all of these transition points.
1170 *
1171 * If hardware enumerates RDCL_NO, indicating that it is not vulnerable to L1TF,
1172 * then we change the spec_uarch_flush() function to point to x86_md_clear(). If
1173 * MDS_NO has been set, then this is fully mitigated and x86_md_clear() becomes
1174 * a no-op.
1175 *
1176 * Unfortunately, with this issue hyperthreading rears its ugly head. In
1177 * particular, everything we've discussed above is only valid for a single
1178 * thread executing on a core. In the case where you have hyper-threading
1179 * present, this attack can be performed between threads. The theoretical fix
1180 * for this is to ensure that both threads are always in the same security
1181 * domain. This means that they are executing in the same ring and mutually
1182 * trust each other. Practically speaking, this would mean that a system call
1183 * would have to issue an inter-processor interrupt (IPI) to the other thread.
1184 * Rather than implement this, we recommend that one disables hyper-threading
1185 * through the use of psradm -aS.
1186 *
1187 * TSX ASYNCHRONOUS ABORT
1188 *
1189 * TSX Asynchronous Abort (TAA) is another side-channel vulnerability that
1190 * behaves like MDS, but leverages Intel's transactional instructions as another
1191 * vector. Effectively, when a transaction hits one of these cases (unmapped
1192 * page, various cache snoop activity, etc.) then the same data can be exposed
1193 * as in the case of MDS. This means that you can attack your twin.
1194 *
1195 * Intel has described that there are two different ways that we can mitigate
1196 * this problem on affected processors:
1197 *
1198 * 1) We can use the same techniques used to deal with MDS. Flushing the
1199 * microarchitectural buffers and disabling hyperthreading will mitigate
1200 * this in the same way.
1201 *
1202 * 2) Using microcode to disable TSX.
1203 *
1204 * Now, most processors that are subject to MDS (as in they don't have MDS_NO in
1205 * the IA32_ARCH_CAPABILITIES MSR) will not receive microcode to disable TSX.
1206 * That's OK as we're already doing all such mitigations. On the other hand,
1207 * processors with MDS_NO are all supposed to receive microcode updates that
1208 * enumerate support for disabling TSX. In general, we'd rather use this method
1209 * when available as it doesn't require disabling hyperthreading to be
1210 * effective. Currently we basically are relying on microcode for processors
1211 * that enumerate MDS_NO.
1212 *
1213 * The microcode features are enumerated as part of the IA32_ARCH_CAPABILITIES.
1214 * When bit 7 (IA32_ARCH_CAP_TSX_CTRL) is present, then we are given two
1215 * different powers. The first allows us to cause all transactions to
1216 * immediately abort. The second gives us a means of disabling TSX completely,
1217 * which includes removing it from cpuid. If we have support for this in
1218 * microcode during the first cpuid pass, then we'll disable TSX completely such
1219 * that user land never has a chance to observe the bit. However, if we are late
1220 * loading the microcode, then we must use the functionality to cause
1221 * transactions to automatically abort. This is necessary for user land's sake.
1222 * Once a program sees a cpuid bit, it must not be taken away.
1223 *
1224 * We track whether or not we should do this based on what cpuid pass we're in.
1225 * Whenever we hit cpuid_scan_security() on the boot CPU and we're still on pass
1226 * 1 of the cpuid logic, then we can completely turn off TSX. Notably this
1227 * should happen twice. Once in the normal cpuid_pass1() code and then a second
1228 * time after we do the initial microcode update. As a result we need to be
1229 * careful in cpuid_apply_tsx() to only use the MSR if we've loaded a suitable
1230 * microcode on the current CPU (which happens prior to cpuid_pass_ucode()).
1231 *
1232 * If TAA has been fixed, then it will be enumerated in IA32_ARCH_CAPABILITIES
1233 * as TAA_NO. In such a case, we will still disable TSX: it's proven to be an
1234 * unfortunate feature in a number of ways, and taking the opportunity to
1235 * finally be able to turn it off is likely to be of benefit in the future.
1236 *
1237 * SUMMARY
1238 *
1239 * The following table attempts to summarize the mitigations for various issues
1240 * and what's done in various places:
1241 *
1242 * - Spectre v1: Not currently mitigated
1243 * - swapgs: lfences after swapgs paths
1244 * - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support
1245 * - Meltdown: Kernel Page Table Isolation
1246 * - Spectre v3a: Updated CPU microcode
1247 * - Spectre v4: Not currently mitigated
1248 * - SpectreRSB: SMEP and RSB Stuffing
1249 * - L1TF: spec_uarch_flush, SMT exclusion, requires microcode
1250 * - MDS: x86_md_clear, requires microcode, disabling SMT
1251 * - TAA: x86_md_clear and disabling SMT OR microcode and disabling TSX
1252 *
1253 * The following table indicates the x86 feature set bits that indicate that a
1254 * given problem has been solved or a notable feature is present:
1255 *
1256 * - RDCL_NO: Meltdown, L1TF, MSBDS subset of MDS
1257 * - MDS_NO: All forms of MDS
1258 * - TAA_NO: TAA
1259 */
1260
1261 #include <sys/types.h>
1262 #include <sys/archsystm.h>
1263 #include <sys/x86_archext.h>
1264 #include <sys/kmem.h>
1265 #include <sys/systm.h>
1266 #include <sys/cmn_err.h>
1267 #include <sys/sunddi.h>
1268 #include <sys/sunndi.h>
1269 #include <sys/cpuvar.h>
1270 #include <sys/processor.h>
1271 #include <sys/sysmacros.h>
1272 #include <sys/pg.h>
1273 #include <sys/fp.h>
1274 #include <sys/controlregs.h>
1275 #include <sys/bitmap.h>
1276 #include <sys/auxv_386.h>
1277 #include <sys/memnode.h>
1278 #include <sys/pci_cfgspace.h>
1295
1296 #if defined(__xpv)
1297 int x86_use_pcid = 0;
1298 int x86_use_invpcid = 0;
1299 #else
1300 int x86_use_pcid = -1;
1301 int x86_use_invpcid = -1;
1302 #endif
1303
1304 typedef enum {
1305 X86_SPECTREV2_RETPOLINE,
1306 X86_SPECTREV2_RETPOLINE_AMD,
1307 X86_SPECTREV2_ENHANCED_IBRS,
1308 X86_SPECTREV2_DISABLED
1309 } x86_spectrev2_mitigation_t;
1310
1311 uint_t x86_disable_spectrev2 = 0;
1312 static x86_spectrev2_mitigation_t x86_spectrev2_mitigation =
1313 X86_SPECTREV2_RETPOLINE;
1314
1315 /*
1316 * The mitigation status for TAA:
1317 * X86_TAA_NOTHING -- no mitigation available for TAA side-channels
1318 * X86_TAA_DISABLED -- mitigation disabled via x86_disable_taa
1319 * X86_TAA_MD_CLEAR -- MDS mitigation also suffices for TAA
1320 * X86_TAA_TSX_FORCE_ABORT -- transactions are forced to abort
1321 * X86_TAA_TSX_DISABLE -- force abort transactions and hide from CPUID
1322 * X86_TAA_HW_MITIGATED -- TSX potentially active but H/W not TAA-vulnerable
1323 */
1324 typedef enum {
1325 X86_TAA_NOTHING,
1326 X86_TAA_DISABLED,
1327 X86_TAA_MD_CLEAR,
1328 X86_TAA_TSX_FORCE_ABORT,
1329 X86_TAA_TSX_DISABLE,
1330 X86_TAA_HW_MITIGATED
1331 } x86_taa_mitigation_t;
1332
1333 uint_t x86_disable_taa = 0;
1334 static x86_taa_mitigation_t x86_taa_mitigation = X86_TAA_NOTHING;
1335
1336 uint_t pentiumpro_bug4046376;
1337
1338 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
1339
1340 static char *x86_feature_names[NUM_X86_FEATURES] = {
1341 "lgpg",
1342 "tsc",
1343 "msr",
1344 "mtrr",
1345 "pge",
1346 "de",
1347 "cmov",
1348 "mmx",
1349 "mca",
1350 "pae",
1351 "cv8",
1352 "pat",
1353 "sep",
1354 "sse",
1355 "sse2",
1417 "rdcl_no",
1418 "ibrs_all",
1419 "rsba",
1420 "ssb_no",
1421 "stibp_all",
1422 "flush_cmd",
1423 "l1d_vmentry_no",
1424 "fsgsbase",
1425 "clflushopt",
1426 "clwb",
1427 "monitorx",
1428 "clzero",
1429 "xop",
1430 "fma4",
1431 "tbm",
1432 "avx512_vnni",
1433 "amd_pcec",
1434 "mb_clear",
1435 "mds_no",
1436 "core_thermal",
1437 "pkg_thermal",
1438 "tsx_ctrl",
1439 "taa_no"
1440 };
1441
1442 boolean_t
1443 is_x86_feature(void *featureset, uint_t feature)
1444 {
1445 ASSERT(feature < NUM_X86_FEATURES);
1446 return (BT_TEST((ulong_t *)featureset, feature));
1447 }
1448
1449 void
1450 add_x86_feature(void *featureset, uint_t feature)
1451 {
1452 ASSERT(feature < NUM_X86_FEATURES);
1453 BT_SET((ulong_t *)featureset, feature);
1454 }
1455
1456 void
1457 remove_x86_feature(void *featureset, uint_t feature)
1458 {
1459 ASSERT(feature < NUM_X86_FEATURES);
2758 * friendly way. As such, try to read and set the MSR. If we can then
2759 * read back the value we set (it wasn't just set to zero), then we go
2760 * for it.
2761 */
2762 if (!on_trap(&otd, OT_DATA_ACCESS)) {
2763 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2764 val |= AMD_DECODE_CONFIG_LFENCE_DISPATCH;
2765 wrmsr(MSR_AMD_DECODE_CONFIG, val);
2766 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2767 } else {
2768 val = 0;
2769 }
2770 no_trap();
2771
2772 if ((val & AMD_DECODE_CONFIG_LFENCE_DISPATCH) != 0)
2773 return (B_TRUE);
2774 return (B_FALSE);
2775 }
2776 #endif /* !__xpv */
2777
2778 /*
2779 * Determine how we should mitigate TAA or if we need to. Regardless of TAA, if
2780 * we can disable TSX, we do so.
2781 *
2782 * This determination is done only on the boot CPU, potentially after loading
2783 * updated microcode.
2784 */
2785 static void
2786 cpuid_update_tsx(cpu_t *cpu, uchar_t *featureset)
2787 {
2788 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2789
2790 VERIFY(cpu->cpu_id == 0);
2791
2792 if (cpi->cpi_vendor != X86_VENDOR_Intel) {
2793 x86_taa_mitigation = X86_TAA_HW_MITIGATED;
2794 return;
2795 }
2796
2797 if (x86_disable_taa) {
2798 x86_taa_mitigation = X86_TAA_DISABLED;
2799 return;
2800 }
2801
2802 /*
2803 * If we do not have the ability to disable TSX, then our only
2804 * mitigation options are in hardware (TAA_NO), or by using our existing
2805 * MDS mitigation as described above. The latter relies upon us having
2806 * configured MDS mitigations correctly! This includes disabling SMT if
2807 * we want to cross-CPU-thread protection.
2808 */
2809 if (!is_x86_feature(featureset, X86FSET_TSX_CTRL)) {
2810 /*
2811 * It's not clear whether any parts will enumerate TAA_NO
2812 * *without* TSX_CTRL, but let's mark it as such if we see this.
2813 */
2814 if (is_x86_feature(featureset, X86FSET_TAA_NO)) {
2815 x86_taa_mitigation = X86_TAA_HW_MITIGATED;
2816 return;
2817 }
2818
2819 if (is_x86_feature(featureset, X86FSET_MD_CLEAR) &&
2820 !is_x86_feature(featureset, X86FSET_MDS_NO)) {
2821 x86_taa_mitigation = X86_TAA_MD_CLEAR;
2822 } else {
2823 x86_taa_mitigation = X86_TAA_NOTHING;
2824 }
2825 return;
2826 }
2827
2828 /*
2829 * We have TSX_CTRL, but we can only fully disable TSX if we're early
2830 * enough in boot.
2831 *
2832 * Otherwise, we'll fall back to causing transactions to abort as our
2833 * mitigation. TSX-using code will always take the fallback path.
2834 */
2835 if (cpi->cpi_pass < 4) {
2836 x86_taa_mitigation = X86_TAA_TSX_DISABLE;
2837 } else {
2838 x86_taa_mitigation = X86_TAA_TSX_FORCE_ABORT;
2839 }
2840 }
2841
2842 /*
2843 * As mentioned, we should only touch the MSR when we've got a suitable
2844 * microcode loaded on this CPU.
2845 */
2846 static void
2847 cpuid_apply_tsx(x86_taa_mitigation_t taa, uchar_t *featureset)
2848 {
2849 uint64_t val;
2850
2851 switch (taa) {
2852 case X86_TAA_TSX_DISABLE:
2853 if (!is_x86_feature(featureset, X86FSET_TSX_CTRL))
2854 return;
2855 val = rdmsr(MSR_IA32_TSX_CTRL);
2856 val |= IA32_TSX_CTRL_CPUID_CLEAR | IA32_TSX_CTRL_RTM_DISABLE;
2857 wrmsr(MSR_IA32_TSX_CTRL, val);
2858 break;
2859 case X86_TAA_TSX_FORCE_ABORT:
2860 if (!is_x86_feature(featureset, X86FSET_TSX_CTRL))
2861 return;
2862 val = rdmsr(MSR_IA32_TSX_CTRL);
2863 val |= IA32_TSX_CTRL_RTM_DISABLE;
2864 wrmsr(MSR_IA32_TSX_CTRL, val);
2865 break;
2866 case X86_TAA_HW_MITIGATED:
2867 case X86_TAA_MD_CLEAR:
2868 case X86_TAA_DISABLED:
2869 case X86_TAA_NOTHING:
2870 break;
2871 }
2872 }
2873
2874 static void
2875 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2876 {
2877 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2878 x86_spectrev2_mitigation_t v2mit;
2879
2880 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2881 cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2882 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2883 add_x86_feature(featureset, X86FSET_IBPB);
2884 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2885 add_x86_feature(featureset, X86FSET_IBRS);
2886 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2887 add_x86_feature(featureset, X86FSET_STIBP);
2888 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2889 add_x86_feature(featureset, X86FSET_STIBP_ALL);
2890 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2891 add_x86_feature(featureset, X86FSET_SSBD);
2892 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2893 add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2894 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)
2943 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2944 add_x86_feature(featureset,
2945 X86FSET_IBRS_ALL);
2946 }
2947 if (reg & IA32_ARCH_CAP_RSBA) {
2948 add_x86_feature(featureset,
2949 X86FSET_RSBA);
2950 }
2951 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2952 add_x86_feature(featureset,
2953 X86FSET_L1D_VM_NO);
2954 }
2955 if (reg & IA32_ARCH_CAP_SSB_NO) {
2956 add_x86_feature(featureset,
2957 X86FSET_SSB_NO);
2958 }
2959 if (reg & IA32_ARCH_CAP_MDS_NO) {
2960 add_x86_feature(featureset,
2961 X86FSET_MDS_NO);
2962 }
2963 if (reg & IA32_ARCH_CAP_TSX_CTRL) {
2964 add_x86_feature(featureset,
2965 X86FSET_TSX_CTRL);
2966 }
2967 if (reg & IA32_ARCH_CAP_TAA_NO) {
2968 add_x86_feature(featureset,
2969 X86FSET_TAA_NO);
2970 }
2971 }
2972 no_trap();
2973 }
2974 #endif /* !__xpv */
2975
2976 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2977 add_x86_feature(featureset, X86FSET_SSBD);
2978
2979 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2980 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2981 }
2982
2983 /*
2984 * Take care of certain mitigations on the non-boot CPU. The boot CPU
2985 * will have already run this function and determined what we need to
2986 * do. This gives us a hook for per-HW thread mitigations such as
2987 * enhanced IBRS, or disabling TSX.
2988 */
2989 if (cpu->cpu_id != 0) {
2990 if (x86_spectrev2_mitigation == X86_SPECTREV2_ENHANCED_IBRS) {
2991 cpuid_enable_enhanced_ibrs();
2992 }
2993
2994 cpuid_apply_tsx(x86_taa_mitigation, featureset);
2995 return;
2996 }
2997
2998 /*
2999 * Go through and initialize various security mechanisms that we should
3000 * only do on a single CPU. This includes Spectre V2, L1TF, MDS, and
3001 * TAA.
3002 */
3003
3004 /*
3005 * By default we've come in with retpolines enabled. Check whether we
3006 * should disable them or enable enhanced IBRS. RSB stuffing is enabled
3007 * by default, but disabled if we are using enhanced IBRS.
3008 */
3009 if (x86_disable_spectrev2 != 0) {
3010 v2mit = X86_SPECTREV2_DISABLED;
3011 } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
3012 cpuid_enable_enhanced_ibrs();
3013 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
3014 #ifndef __xpv
3015 } else if (cpuid_use_amd_retpoline(cpi)) {
3016 v2mit = X86_SPECTREV2_RETPOLINE_AMD;
3017 #endif /* !__xpv */
3018 } else {
3019 v2mit = X86_SPECTREV2_RETPOLINE;
3020 }
3021
3031 *
3032 * If any of these are present, then we need to flush u-arch state at
3033 * various points. For MDS, we need to do so whenever we change to a
3034 * lesser privilege level or we are halting the CPU. For L1TF we need to
3035 * flush the L1D cache at VM entry. When we have microcode that handles
3036 * MDS, the L1D flush also clears the other u-arch state that the
3037 * md_clear does.
3038 */
3039
3040 /*
3041 * Update whether or not we need to be taking explicit action against
3042 * MDS.
3043 */
3044 cpuid_update_md_clear(cpu, featureset);
3045
3046 /*
3047 * Determine whether SMT exclusion is required and whether or not we
3048 * need to perform an l1d flush.
3049 */
3050 cpuid_update_l1d_flush(cpu, featureset);
3051
3052 /*
3053 * Determine what our mitigation strategy should be for TAA and then
3054 * also apply TAA mitigations.
3055 */
3056 cpuid_update_tsx(cpu, featureset);
3057 cpuid_apply_tsx(x86_taa_mitigation, featureset);
3058 }
3059
3060 /*
3061 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
3062 */
3063 void
3064 setup_xfem(void)
3065 {
3066 uint64_t flags = XFEATURE_LEGACY_FP;
3067
3068 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
3069
3070 if (is_x86_feature(x86_featureset, X86FSET_SSE))
3071 flags |= XFEATURE_SSE;
3072
3073 if (is_x86_feature(x86_featureset, X86FSET_AVX))
3074 flags |= XFEATURE_AVX;
3075
3076 if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
3077 flags |= XFEATURE_AVX512;
|