Print this page
update
11967 need TAA mitigation
Portions contributed by: Robert Mustacchi <rm@fingolfin.org>
Reviewed by: Dan McDonald <danmcd@joyent.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>

  26  */
  27 /*
  28  * Copyright (c) 2010, Intel Corporation.
  29  * All rights reserved.
  30  */
  31 /*
  32  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  33  */
  34 /*
  35  * Copyright 2019 Joyent, Inc.
  36  */
  37 
  38 /*
  39  * CPU Identification logic
  40  *
  41  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  42  * with the identification of CPUs, their features, and their topologies. More
  43  * specifically, this file helps drive the following:
  44  *
  45  * 1. Enumeration of features of the processor which are used by the kernel to
  46  *    determine what features to enable or disable. These may be instruction set
  47  *    enhancements or features that we use.
  48  *
  49  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  50  *    will be told about through the auxiliary vector.
  51  *
  52  * 3. Understanding the physical topology of the CPU such as the number of
  53  *    caches, how many cores it has, whether or not it supports symmetric
  54  *    multi-processing (SMT), etc.
  55  *
  56  * ------------------------


1169  * flush the microarchitectural state before the CPU goes idles by calling hlt,
1170  * mwait, or another ACPI method. To perform these flushes, we call
1171  * x86_md_clear() at all of these transition points.
1172  *
1173  * If hardware enumerates RDCL_NO, indicating that it is not vulnerable to L1TF,
1174  * then we change the spec_uarch_flush() function to point to x86_md_clear(). If
1175  * MDS_NO has been set, then this is fully mitigated and x86_md_clear() becomes
1176  * a no-op.
1177  *
1178  * Unfortunately, with this issue hyperthreading rears its ugly head. In
1179  * particular, everything we've discussed above is only valid for a single
1180  * thread executing on a core. In the case where you have hyper-threading
1181  * present, this attack can be performed between threads. The theoretical fix
1182  * for this is to ensure that both threads are always in the same security
1183  * domain. This means that they are executing in the same ring and mutually
1184  * trust each other. Practically speaking, this would mean that a system call
1185  * would have to issue an inter-processor interrupt (IPI) to the other thread.
1186  * Rather than implement this, we recommend that one disables hyper-threading
1187  * through the use of psradm -aS.
1188  *


















































1189  * SUMMARY
1190  *
1191  * The following table attempts to summarize the mitigations for various issues
1192  * and what's done in various places:
1193  *
1194  *  - Spectre v1: Not currently mitigated
1195  *  - swapgs: lfences after swapgs paths
1196  *  - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support
1197  *  - Meltdown: Kernel Page Table Isolation
1198  *  - Spectre v3a: Updated CPU microcode
1199  *  - Spectre v4: Not currently mitigated
1200  *  - SpectreRSB: SMEP and RSB Stuffing
1201  *  - L1TF: spec_uarch_flush, SMT exclusion, requires microcode
1202  *  - MDS: x86_md_clear, requires microcode, disabling hyper threading

1203  *
1204  * The following table indicates the x86 feature set bits that indicate that a
1205  * given problem has been solved or a notable feature is present:
1206  *
1207  *  - RDCL_NO: Meltdown, L1TF, MSBDS subset of MDS
1208  *  - MDS_NO: All forms of MDS

1209  */
1210 
1211 #include <sys/types.h>
1212 #include <sys/archsystm.h>
1213 #include <sys/x86_archext.h>
1214 #include <sys/kmem.h>
1215 #include <sys/systm.h>
1216 #include <sys/cmn_err.h>
1217 #include <sys/sunddi.h>
1218 #include <sys/sunndi.h>
1219 #include <sys/cpuvar.h>
1220 #include <sys/processor.h>
1221 #include <sys/sysmacros.h>
1222 #include <sys/pg.h>
1223 #include <sys/fp.h>
1224 #include <sys/controlregs.h>
1225 #include <sys/bitmap.h>
1226 #include <sys/auxv_386.h>
1227 #include <sys/memnode.h>
1228 #include <sys/pci_cfgspace.h>


1245 
1246 #if defined(__xpv)
1247 int x86_use_pcid = 0;
1248 int x86_use_invpcid = 0;
1249 #else
1250 int x86_use_pcid = -1;
1251 int x86_use_invpcid = -1;
1252 #endif
1253 
1254 typedef enum {
1255         X86_SPECTREV2_RETPOLINE,
1256         X86_SPECTREV2_RETPOLINE_AMD,
1257         X86_SPECTREV2_ENHANCED_IBRS,
1258         X86_SPECTREV2_DISABLED
1259 } x86_spectrev2_mitigation_t;
1260 
1261 uint_t x86_disable_spectrev2 = 0;
1262 static x86_spectrev2_mitigation_t x86_spectrev2_mitigation =
1263     X86_SPECTREV2_RETPOLINE;
1264 





















1265 uint_t pentiumpro_bug4046376;
1266 
1267 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
1268 
1269 static char *x86_feature_names[NUM_X86_FEATURES] = {
1270         "lgpg",
1271         "tsc",
1272         "msr",
1273         "mtrr",
1274         "pge",
1275         "de",
1276         "cmov",
1277         "mmx",
1278         "mca",
1279         "pae",
1280         "cv8",
1281         "pat",
1282         "sep",
1283         "sse",
1284         "sse2",


1346         "rdcl_no",
1347         "ibrs_all",
1348         "rsba",
1349         "ssb_no",
1350         "stibp_all",
1351         "flush_cmd",
1352         "l1d_vmentry_no",
1353         "fsgsbase",
1354         "clflushopt",
1355         "clwb",
1356         "monitorx",
1357         "clzero",
1358         "xop",
1359         "fma4",
1360         "tbm",
1361         "avx512_vnni",
1362         "amd_pcec",
1363         "mb_clear",
1364         "mds_no",
1365         "core_thermal",
1366         "pkg_thermal"


1367 };
1368 
1369 boolean_t
1370 is_x86_feature(void *featureset, uint_t feature)
1371 {
1372         ASSERT(feature < NUM_X86_FEATURES);
1373         return (BT_TEST((ulong_t *)featureset, feature));
1374 }
1375 
1376 void
1377 add_x86_feature(void *featureset, uint_t feature)
1378 {
1379         ASSERT(feature < NUM_X86_FEATURES);
1380         BT_SET((ulong_t *)featureset, feature);
1381 }
1382 
1383 void
1384 remove_x86_feature(void *featureset, uint_t feature)
1385 {
1386         ASSERT(feature < NUM_X86_FEATURES);


2685          * friendly way. As such, try to read and set the MSR. If we can then
2686          * read back the value we set (it wasn't just set to zero), then we go
2687          * for it.
2688          */
2689         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2690                 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2691                 val |= AMD_DECODE_CONFIG_LFENCE_DISPATCH;
2692                 wrmsr(MSR_AMD_DECODE_CONFIG, val);
2693                 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2694         } else {
2695                 val = 0;
2696         }
2697         no_trap();
2698 
2699         if ((val & AMD_DECODE_CONFIG_LFENCE_DISPATCH) != 0)
2700                 return (B_TRUE);
2701         return (B_FALSE);
2702 }
2703 #endif  /* !__xpv */
2704 







2705 static void

























































































2706 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2707 {
2708         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2709         x86_spectrev2_mitigation_t v2mit;
2710 
2711         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2712             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2713                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2714                         add_x86_feature(featureset, X86FSET_IBPB);
2715                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2716                         add_x86_feature(featureset, X86FSET_IBRS);
2717                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2718                         add_x86_feature(featureset, X86FSET_STIBP);
2719                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2720                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2721                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2722                         add_x86_feature(featureset, X86FSET_SSBD);
2723                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2724                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2725                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)


2774                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2775                                         add_x86_feature(featureset,
2776                                             X86FSET_IBRS_ALL);
2777                                 }
2778                                 if (reg & IA32_ARCH_CAP_RSBA) {
2779                                         add_x86_feature(featureset,
2780                                             X86FSET_RSBA);
2781                                 }
2782                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2783                                         add_x86_feature(featureset,
2784                                             X86FSET_L1D_VM_NO);
2785                                 }
2786                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2787                                         add_x86_feature(featureset,
2788                                             X86FSET_SSB_NO);
2789                                 }
2790                                 if (reg & IA32_ARCH_CAP_MDS_NO) {
2791                                         add_x86_feature(featureset,
2792                                             X86FSET_MDS_NO);
2793                                 }



2794                         }





2795                         no_trap();
2796                 }
2797 #endif  /* !__xpv */
2798 
2799                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2800                         add_x86_feature(featureset, X86FSET_SSBD);
2801 
2802                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2803                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2804         }
2805 






2806         if (cpu->cpu_id != 0) {
2807                 if (x86_spectrev2_mitigation == X86_SPECTREV2_ENHANCED_IBRS) {
2808                         cpuid_enable_enhanced_ibrs();
2809                 }


2810                 return;
2811         }
2812 
2813         /*
2814          * Go through and initialize various security mechanisms that we should
2815          * only do on a single CPU. This includes Spectre V2, L1TF, and MDS.

2816          */
2817 
2818         /*
2819          * By default we've come in with retpolines enabled. Check whether we
2820          * should disable them or enable enhanced IBRS. RSB stuffing is enabled
2821          * by default, but disabled if we are using enhanced IBRS.
2822          */
2823         if (x86_disable_spectrev2 != 0) {
2824                 v2mit = X86_SPECTREV2_DISABLED;
2825         } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
2826                 cpuid_enable_enhanced_ibrs();
2827                 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
2828 #ifndef __xpv
2829         } else if (cpuid_use_amd_retpoline(cpi)) {
2830                 v2mit = X86_SPECTREV2_RETPOLINE_AMD;
2831 #endif  /* !__xpv */
2832         } else {
2833                 v2mit = X86_SPECTREV2_RETPOLINE;
2834         }
2835 


2845          *
2846          * If any of these are present, then we need to flush u-arch state at
2847          * various points. For MDS, we need to do so whenever we change to a
2848          * lesser privilege level or we are halting the CPU. For L1TF we need to
2849          * flush the L1D cache at VM entry. When we have microcode that handles
2850          * MDS, the L1D flush also clears the other u-arch state that the
2851          * md_clear does.
2852          */
2853 
2854         /*
2855          * Update whether or not we need to be taking explicit action against
2856          * MDS.
2857          */
2858         cpuid_update_md_clear(cpu, featureset);
2859 
2860         /*
2861          * Determine whether SMT exclusion is required and whether or not we
2862          * need to perform an l1d flush.
2863          */
2864         cpuid_update_l1d_flush(cpu, featureset);







2865 }
2866 
2867 /*
2868  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
2869  */
2870 void
2871 setup_xfem(void)
2872 {
2873         uint64_t flags = XFEATURE_LEGACY_FP;
2874 
2875         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
2876 
2877         if (is_x86_feature(x86_featureset, X86FSET_SSE))
2878                 flags |= XFEATURE_SSE;
2879 
2880         if (is_x86_feature(x86_featureset, X86FSET_AVX))
2881                 flags |= XFEATURE_AVX;
2882 
2883         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
2884                 flags |= XFEATURE_AVX512;




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
  25  * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
  26  * Copyright 2020 Joyent, Inc.
  27  */
  28 /*
  29  * Copyright (c) 2010, Intel Corporation.
  30  * All rights reserved.
  31  */
  32 /*
  33  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  34  */



  35 
  36 /*
  37  * CPU Identification logic
  38  *
  39  * The purpose of this file and its companion, cpuid_subr.c, is to help deal
  40  * with the identification of CPUs, their features, and their topologies. More
  41  * specifically, this file helps drive the following:
  42  *
  43  * 1. Enumeration of features of the processor which are used by the kernel to
  44  *    determine what features to enable or disable. These may be instruction set
  45  *    enhancements or features that we use.
  46  *
  47  * 2. Enumeration of instruction set architecture (ISA) additions that userland
  48  *    will be told about through the auxiliary vector.
  49  *
  50  * 3. Understanding the physical topology of the CPU such as the number of
  51  *    caches, how many cores it has, whether or not it supports symmetric
  52  *    multi-processing (SMT), etc.
  53  *
  54  * ------------------------


1167  * flush the microarchitectural state before the CPU goes idles by calling hlt,
1168  * mwait, or another ACPI method. To perform these flushes, we call
1169  * x86_md_clear() at all of these transition points.
1170  *
1171  * If hardware enumerates RDCL_NO, indicating that it is not vulnerable to L1TF,
1172  * then we change the spec_uarch_flush() function to point to x86_md_clear(). If
1173  * MDS_NO has been set, then this is fully mitigated and x86_md_clear() becomes
1174  * a no-op.
1175  *
1176  * Unfortunately, with this issue hyperthreading rears its ugly head. In
1177  * particular, everything we've discussed above is only valid for a single
1178  * thread executing on a core. In the case where you have hyper-threading
1179  * present, this attack can be performed between threads. The theoretical fix
1180  * for this is to ensure that both threads are always in the same security
1181  * domain. This means that they are executing in the same ring and mutually
1182  * trust each other. Practically speaking, this would mean that a system call
1183  * would have to issue an inter-processor interrupt (IPI) to the other thread.
1184  * Rather than implement this, we recommend that one disables hyper-threading
1185  * through the use of psradm -aS.
1186  *
1187  * TSX ASYNCHRONOUS ABORT
1188  *
1189  * TSX Asynchronous Abort (TAA) is another side-channel vulnerability that
1190  * behaves like MDS, but leverages Intel's transactional instructions as another
1191  * vector. Effectively, when a transaction hits one of these cases (unmapped
1192  * page, various cache snoop activity, etc.) then the same data can be exposed
1193  * as in the case of MDS. This means that you can attack your twin.
1194  *
1195  * Intel has described that there are two different ways that we can mitigate
1196  * this problem on affected processors:
1197  *
1198  *   1) We can use the same techniques used to deal with MDS. Flushing the
1199  *      microarchitectural buffers and disabling hyperthreading will mitigate
1200  *      this in the same way.
1201  *
1202  *   2) Using microcode to disable TSX.
1203  *
1204  * Now, most processors that are subject to MDS (as in they don't have MDS_NO in
1205  * the IA32_ARCH_CAPABILITIES MSR) will not receive microcode to disable TSX.
1206  * That's OK as we're already doing all such mitigations. On the other hand,
1207  * processors with MDS_NO are all supposed to receive microcode updates that
1208  * enumerate support for disabling TSX. In general, we'd rather use this method
1209  * when available as it doesn't require disabling hyperthreading to be
1210  * effective. Currently we basically are relying on microcode for processors
1211  * that enumerate MDS_NO.
1212  *
1213  * The microcode features are enumerated as part of the IA32_ARCH_CAPABILITIES.
1214  * When bit 7 (IA32_ARCH_CAP_TSX_CTRL) is present, then we are given two
1215  * different powers. The first allows us to cause all transactions to
1216  * immediately abort. The second gives us a means of disabling TSX completely,
1217  * which includes removing it from cpuid. If we have support for this in
1218  * microcode during the first cpuid pass, then we'll disable TSX completely such
1219  * that user land never has a chance to observe the bit. However, if we are late
1220  * loading the microcode, then we must use the functionality to cause
1221  * transactions to automatically abort. This is necessary for user land's sake.
1222  * Once a program sees a cpuid bit, it must not be taken away.
1223  *
1224  * We track whether or not we should do this based on what cpuid pass we're in.
1225  * Whenever we hit cpuid_scan_security() on the boot CPU and we're still on pass
1226  * 1 of the cpuid logic, then we can completely turn off TSX. Notably this
1227  * should happen twice. Once in the normal cpuid_pass1() code and then a second
1228  * time after we do the initial microcode update.  As a result we need to be
1229  * careful in cpuid_apply_tsx() to only use the MSR if we've loaded a suitable
1230  * microcode on the current CPU (which happens prior to cpuid_pass_ucode()).
1231  *
1232  * If TAA has been fixed, then it will be enumerated in IA32_ARCH_CAPABILITIES
1233  * as TAA_NO. In such a case, we will still disable TSX: it's proven to be an
1234  * unfortunate feature in a number of ways, and taking the opportunity to
1235  * finally be able to turn it off is likely to be of benefit in the future.
1236  *
1237  * SUMMARY
1238  *
1239  * The following table attempts to summarize the mitigations for various issues
1240  * and what's done in various places:
1241  *
1242  *  - Spectre v1: Not currently mitigated
1243  *  - swapgs: lfences after swapgs paths
1244  *  - Spectre v2: Retpolines/RSB Stuffing or EIBRS if HW support
1245  *  - Meltdown: Kernel Page Table Isolation
1246  *  - Spectre v3a: Updated CPU microcode
1247  *  - Spectre v4: Not currently mitigated
1248  *  - SpectreRSB: SMEP and RSB Stuffing
1249  *  - L1TF: spec_uarch_flush, SMT exclusion, requires microcode
1250  *  - MDS: x86_md_clear, requires microcode, disabling SMT
1251  *  - TAA: x86_md_clear and disabling SMT OR microcode and disabling TSX
1252  *
1253  * The following table indicates the x86 feature set bits that indicate that a
1254  * given problem has been solved or a notable feature is present:
1255  *
1256  *  - RDCL_NO: Meltdown, L1TF, MSBDS subset of MDS
1257  *  - MDS_NO: All forms of MDS
1258  *  - TAA_NO: TAA
1259  */
1260 
1261 #include <sys/types.h>
1262 #include <sys/archsystm.h>
1263 #include <sys/x86_archext.h>
1264 #include <sys/kmem.h>
1265 #include <sys/systm.h>
1266 #include <sys/cmn_err.h>
1267 #include <sys/sunddi.h>
1268 #include <sys/sunndi.h>
1269 #include <sys/cpuvar.h>
1270 #include <sys/processor.h>
1271 #include <sys/sysmacros.h>
1272 #include <sys/pg.h>
1273 #include <sys/fp.h>
1274 #include <sys/controlregs.h>
1275 #include <sys/bitmap.h>
1276 #include <sys/auxv_386.h>
1277 #include <sys/memnode.h>
1278 #include <sys/pci_cfgspace.h>


1295 
1296 #if defined(__xpv)
1297 int x86_use_pcid = 0;
1298 int x86_use_invpcid = 0;
1299 #else
1300 int x86_use_pcid = -1;
1301 int x86_use_invpcid = -1;
1302 #endif
1303 
1304 typedef enum {
1305         X86_SPECTREV2_RETPOLINE,
1306         X86_SPECTREV2_RETPOLINE_AMD,
1307         X86_SPECTREV2_ENHANCED_IBRS,
1308         X86_SPECTREV2_DISABLED
1309 } x86_spectrev2_mitigation_t;
1310 
1311 uint_t x86_disable_spectrev2 = 0;
1312 static x86_spectrev2_mitigation_t x86_spectrev2_mitigation =
1313     X86_SPECTREV2_RETPOLINE;
1314 
1315 /*
1316  * The mitigation status for TAA:
1317  * X86_TAA_NOTHING -- no mitigation available for TAA side-channels
1318  * X86_TAA_DISABLED -- mitigation disabled via x86_disable_taa
1319  * X86_TAA_MD_CLEAR -- MDS mitigation also suffices for TAA
1320  * X86_TAA_TSX_FORCE_ABORT -- transactions are forced to abort
1321  * X86_TAA_TSX_DISABLE -- force abort transactions and hide from CPUID
1322  * X86_TAA_HW_MITIGATED -- TSX potentially active but H/W not TAA-vulnerable
1323  */
1324 typedef enum {
1325         X86_TAA_NOTHING,
1326         X86_TAA_DISABLED,
1327         X86_TAA_MD_CLEAR,
1328         X86_TAA_TSX_FORCE_ABORT,
1329         X86_TAA_TSX_DISABLE,
1330         X86_TAA_HW_MITIGATED
1331 } x86_taa_mitigation_t;
1332 
1333 uint_t x86_disable_taa = 0;
1334 static x86_taa_mitigation_t x86_taa_mitigation = X86_TAA_NOTHING;
1335 
1336 uint_t pentiumpro_bug4046376;
1337 
1338 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
1339 
1340 static char *x86_feature_names[NUM_X86_FEATURES] = {
1341         "lgpg",
1342         "tsc",
1343         "msr",
1344         "mtrr",
1345         "pge",
1346         "de",
1347         "cmov",
1348         "mmx",
1349         "mca",
1350         "pae",
1351         "cv8",
1352         "pat",
1353         "sep",
1354         "sse",
1355         "sse2",


1417         "rdcl_no",
1418         "ibrs_all",
1419         "rsba",
1420         "ssb_no",
1421         "stibp_all",
1422         "flush_cmd",
1423         "l1d_vmentry_no",
1424         "fsgsbase",
1425         "clflushopt",
1426         "clwb",
1427         "monitorx",
1428         "clzero",
1429         "xop",
1430         "fma4",
1431         "tbm",
1432         "avx512_vnni",
1433         "amd_pcec",
1434         "mb_clear",
1435         "mds_no",
1436         "core_thermal",
1437         "pkg_thermal",
1438         "tsx_ctrl",
1439         "taa_no"
1440 };
1441 
1442 boolean_t
1443 is_x86_feature(void *featureset, uint_t feature)
1444 {
1445         ASSERT(feature < NUM_X86_FEATURES);
1446         return (BT_TEST((ulong_t *)featureset, feature));
1447 }
1448 
1449 void
1450 add_x86_feature(void *featureset, uint_t feature)
1451 {
1452         ASSERT(feature < NUM_X86_FEATURES);
1453         BT_SET((ulong_t *)featureset, feature);
1454 }
1455 
1456 void
1457 remove_x86_feature(void *featureset, uint_t feature)
1458 {
1459         ASSERT(feature < NUM_X86_FEATURES);


2758          * friendly way. As such, try to read and set the MSR. If we can then
2759          * read back the value we set (it wasn't just set to zero), then we go
2760          * for it.
2761          */
2762         if (!on_trap(&otd, OT_DATA_ACCESS)) {
2763                 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2764                 val |= AMD_DECODE_CONFIG_LFENCE_DISPATCH;
2765                 wrmsr(MSR_AMD_DECODE_CONFIG, val);
2766                 val = rdmsr(MSR_AMD_DECODE_CONFIG);
2767         } else {
2768                 val = 0;
2769         }
2770         no_trap();
2771 
2772         if ((val & AMD_DECODE_CONFIG_LFENCE_DISPATCH) != 0)
2773                 return (B_TRUE);
2774         return (B_FALSE);
2775 }
2776 #endif  /* !__xpv */
2777 
2778 /*
2779  * Determine how we should mitigate TAA or if we need to. Regardless of TAA, if
2780  * we can disable TSX, we do so.
2781  *
2782  * This determination is done only on the boot CPU, potentially after loading
2783  * updated microcode.
2784  */
2785 static void
2786 cpuid_update_tsx(cpu_t *cpu, uchar_t *featureset)
2787 {
2788         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2789 
2790         VERIFY(cpu->cpu_id == 0);
2791 
2792         if (cpi->cpi_vendor != X86_VENDOR_Intel) {
2793                 x86_taa_mitigation = X86_TAA_HW_MITIGATED;
2794                 return;
2795         }
2796 
2797         if (x86_disable_taa) {
2798                 x86_taa_mitigation = X86_TAA_DISABLED;
2799                 return;
2800         }
2801 
2802         /*
2803          * If we do not have the ability to disable TSX, then our only
2804          * mitigation options are in hardware (TAA_NO), or by using our existing
2805          * MDS mitigation as described above.  The latter relies upon us having
2806          * configured MDS mitigations correctly! This includes disabling SMT if
2807          * we want to cross-CPU-thread protection.
2808          */
2809         if (!is_x86_feature(featureset, X86FSET_TSX_CTRL)) {
2810                 /*
2811                  * It's not clear whether any parts will enumerate TAA_NO
2812                  * *without* TSX_CTRL, but let's mark it as such if we see this.
2813                  */
2814                 if (is_x86_feature(featureset, X86FSET_TAA_NO)) {
2815                         x86_taa_mitigation = X86_TAA_HW_MITIGATED;
2816                         return;
2817                 }
2818 
2819                 if (is_x86_feature(featureset, X86FSET_MD_CLEAR) &&
2820                     !is_x86_feature(featureset, X86FSET_MDS_NO)) {
2821                         x86_taa_mitigation = X86_TAA_MD_CLEAR;
2822                 } else {
2823                         x86_taa_mitigation = X86_TAA_NOTHING;
2824                 }
2825                 return;
2826         }
2827 
2828         /*
2829          * We have TSX_CTRL, but we can only fully disable TSX if we're early
2830          * enough in boot.
2831          *
2832          * Otherwise, we'll fall back to causing transactions to abort as our
2833          * mitigation. TSX-using code will always take the fallback path.
2834          */
2835         if (cpi->cpi_pass < 4) {
2836                 x86_taa_mitigation = X86_TAA_TSX_DISABLE;
2837         } else {
2838                 x86_taa_mitigation = X86_TAA_TSX_FORCE_ABORT;
2839         }
2840 }
2841 
2842 /*
2843  * As mentioned, we should only touch the MSR when we've got a suitable
2844  * microcode loaded on this CPU.
2845  */
2846 static void
2847 cpuid_apply_tsx(x86_taa_mitigation_t taa, uchar_t *featureset)
2848 {
2849         uint64_t val;
2850 
2851         switch (taa) {
2852         case X86_TAA_TSX_DISABLE:
2853                 if (!is_x86_feature(featureset, X86FSET_TSX_CTRL))
2854                         return;
2855                 val = rdmsr(MSR_IA32_TSX_CTRL);
2856                 val |= IA32_TSX_CTRL_CPUID_CLEAR | IA32_TSX_CTRL_RTM_DISABLE;
2857                 wrmsr(MSR_IA32_TSX_CTRL, val);
2858                 break;
2859         case X86_TAA_TSX_FORCE_ABORT:
2860                 if (!is_x86_feature(featureset, X86FSET_TSX_CTRL))
2861                         return;
2862                 val = rdmsr(MSR_IA32_TSX_CTRL);
2863                 val |= IA32_TSX_CTRL_RTM_DISABLE;
2864                 wrmsr(MSR_IA32_TSX_CTRL, val);
2865                 break;
2866         case X86_TAA_HW_MITIGATED:
2867         case X86_TAA_MD_CLEAR:
2868         case X86_TAA_DISABLED:
2869         case X86_TAA_NOTHING:
2870                 break;
2871         }
2872 }
2873 
2874 static void
2875 cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
2876 {
2877         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2878         x86_spectrev2_mitigation_t v2mit;
2879 
2880         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2881             cpi->cpi_xmaxeax >= CPUID_LEAF_EXT_8) {
2882                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBPB)
2883                         add_x86_feature(featureset, X86FSET_IBPB);
2884                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_IBRS)
2885                         add_x86_feature(featureset, X86FSET_IBRS);
2886                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP)
2887                         add_x86_feature(featureset, X86FSET_STIBP);
2888                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_STIBP_ALL)
2889                         add_x86_feature(featureset, X86FSET_STIBP_ALL);
2890                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSBD)
2891                         add_x86_feature(featureset, X86FSET_SSBD);
2892                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_VIRT_SSBD)
2893                         add_x86_feature(featureset, X86FSET_SSBD_VIRT);
2894                 if (cpi->cpi_extd[8].cp_ebx & CPUID_AMD_EBX_SSB_NO)


2943                                 if (reg & IA32_ARCH_CAP_IBRS_ALL) {
2944                                         add_x86_feature(featureset,
2945                                             X86FSET_IBRS_ALL);
2946                                 }
2947                                 if (reg & IA32_ARCH_CAP_RSBA) {
2948                                         add_x86_feature(featureset,
2949                                             X86FSET_RSBA);
2950                                 }
2951                                 if (reg & IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) {
2952                                         add_x86_feature(featureset,
2953                                             X86FSET_L1D_VM_NO);
2954                                 }
2955                                 if (reg & IA32_ARCH_CAP_SSB_NO) {
2956                                         add_x86_feature(featureset,
2957                                             X86FSET_SSB_NO);
2958                                 }
2959                                 if (reg & IA32_ARCH_CAP_MDS_NO) {
2960                                         add_x86_feature(featureset,
2961                                             X86FSET_MDS_NO);
2962                                 }
2963                                 if (reg & IA32_ARCH_CAP_TSX_CTRL) {
2964                                         add_x86_feature(featureset,
2965                                             X86FSET_TSX_CTRL);
2966                                 }
2967                                 if (reg & IA32_ARCH_CAP_TAA_NO) {
2968                                         add_x86_feature(featureset,
2969                                             X86FSET_TAA_NO);
2970                                 }
2971                         }
2972                         no_trap();
2973                 }
2974 #endif  /* !__xpv */
2975 
2976                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
2977                         add_x86_feature(featureset, X86FSET_SSBD);
2978 
2979                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
2980                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
2981         }
2982 
2983         /*
2984          * Take care of certain mitigations on the non-boot CPU. The boot CPU
2985          * will have already run this function and determined what we need to
2986          * do. This gives us a hook for per-HW thread mitigations such as
2987          * enhanced IBRS, or disabling TSX.
2988          */
2989         if (cpu->cpu_id != 0) {
2990                 if (x86_spectrev2_mitigation == X86_SPECTREV2_ENHANCED_IBRS) {
2991                         cpuid_enable_enhanced_ibrs();
2992                 }
2993 
2994                 cpuid_apply_tsx(x86_taa_mitigation, featureset);
2995                 return;
2996         }
2997 
2998         /*
2999          * Go through and initialize various security mechanisms that we should
3000          * only do on a single CPU. This includes Spectre V2, L1TF, MDS, and
3001          * TAA.
3002          */
3003 
3004         /*
3005          * By default we've come in with retpolines enabled. Check whether we
3006          * should disable them or enable enhanced IBRS. RSB stuffing is enabled
3007          * by default, but disabled if we are using enhanced IBRS.
3008          */
3009         if (x86_disable_spectrev2 != 0) {
3010                 v2mit = X86_SPECTREV2_DISABLED;
3011         } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
3012                 cpuid_enable_enhanced_ibrs();
3013                 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
3014 #ifndef __xpv
3015         } else if (cpuid_use_amd_retpoline(cpi)) {
3016                 v2mit = X86_SPECTREV2_RETPOLINE_AMD;
3017 #endif  /* !__xpv */
3018         } else {
3019                 v2mit = X86_SPECTREV2_RETPOLINE;
3020         }
3021 


3031          *
3032          * If any of these are present, then we need to flush u-arch state at
3033          * various points. For MDS, we need to do so whenever we change to a
3034          * lesser privilege level or we are halting the CPU. For L1TF we need to
3035          * flush the L1D cache at VM entry. When we have microcode that handles
3036          * MDS, the L1D flush also clears the other u-arch state that the
3037          * md_clear does.
3038          */
3039 
3040         /*
3041          * Update whether or not we need to be taking explicit action against
3042          * MDS.
3043          */
3044         cpuid_update_md_clear(cpu, featureset);
3045 
3046         /*
3047          * Determine whether SMT exclusion is required and whether or not we
3048          * need to perform an l1d flush.
3049          */
3050         cpuid_update_l1d_flush(cpu, featureset);
3051 
3052         /*
3053          * Determine what our mitigation strategy should be for TAA and then
3054          * also apply TAA mitigations.
3055          */
3056         cpuid_update_tsx(cpu, featureset);
3057         cpuid_apply_tsx(x86_taa_mitigation, featureset);
3058 }
3059 
3060 /*
3061  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
3062  */
3063 void
3064 setup_xfem(void)
3065 {
3066         uint64_t flags = XFEATURE_LEGACY_FP;
3067 
3068         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
3069 
3070         if (is_x86_feature(x86_featureset, X86FSET_SSE))
3071                 flags |= XFEATURE_SSE;
3072 
3073         if (is_x86_feature(x86_featureset, X86FSET_AVX))
3074                 flags |= XFEATURE_AVX;
3075 
3076         if (is_x86_feature(x86_featureset, X86FSET_AVX512F))
3077                 flags |= XFEATURE_AVX512;