1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2018, Joyent, Inc. 25 */ 26 27 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 29 /* All Rights Reserved */ 30 31 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 32 /* All Rights Reserved */ 33 34 /* 35 * Copyright (c) 2009, Intel Corporation. 36 * All rights reserved. 37 */ 38 39 #include <sys/asm_linkage.h> 40 #include <sys/asm_misc.h> 41 #include <sys/regset.h> 42 #include <sys/privregs.h> 43 #include <sys/x86_archext.h> 44 45 #include "assym.h" 46 47 /* 48 * Returns zero if x87 "chip" is present(!) 49 */ 50 ENTRY_NP(fpu_initial_probe) 51 CLTS 52 fninit 53 fnstsw %ax 54 movzbl %al, %eax 55 ret 56 SET_SIZE(fpu_initial_probe) 57 58 ENTRY_NP(fxsave_insn) 59 fxsaveq (%rdi) 60 ret 61 SET_SIZE(fxsave_insn) 62 63 /* 64 * One of these routines is called from any lwp with floating 65 * point context as part of the prolog of a context switch. 66 */ 67 68 /* 69 * These three functions define the Intel "xsave" handling for CPUs with 70 * different features. Newer AMD CPUs can also use these functions. See the 71 * 'exception pointers' comment below. 72 */ 73 ENTRY_NP(fpxsave_ctxt) /* %rdi is a struct fpu_ctx */ 74 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 75 jne 1f 76 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 77 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ 78 fxsaveq (%rdi) 79 STTS(%rsi) /* trap on next fpu touch */ 80 1: rep; ret /* use 2 byte return instruction when branch target */ 81 /* AMD Software Optimization Guide - Section 6.2 */ 82 SET_SIZE(fpxsave_ctxt) 83 84 ENTRY_NP(xsave_ctxt) 85 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 86 jne 1f 87 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 88 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ 89 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 90 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 91 xsave (%rsi) 92 STTS(%rsi) /* trap on next fpu touch */ 93 1: ret 94 SET_SIZE(xsave_ctxt) 95 96 ENTRY_NP(xsaveopt_ctxt) 97 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 98 jne 1f 99 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 100 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ 101 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 102 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 103 xsaveopt (%rsi) 104 STTS(%rsi) /* trap on next fpu touch */ 105 1: ret 106 SET_SIZE(xsaveopt_ctxt) 107 108 /* 109 * On certain AMD processors, the "exception pointers" (i.e. the last 110 * instruction pointer, last data pointer, and last opcode) are saved by the 111 * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is 112 * set. 113 * 114 * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior. 115 * We can detect this via an AMD specific cpuid feature bit 116 * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions. 117 * Otherwise we use these more complex functions on AMD CPUs. All three follow 118 * the same logic after the xsave* instruction. 119 */ 120 ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */ 121 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 122 jne 1f 123 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 124 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ 125 fxsaveq (%rdi) 126 /* 127 * To ensure that we don't leak these values into the next context 128 * on the cpu, we could just issue an fninit here, but that's 129 * rather slow and so we issue an instruction sequence that 130 * clears them more quickly, if a little obscurely. 131 */ 132 btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ 133 jnc 0f /* jump if ES = 0 */ 134 fnclex /* clear pending x87 exceptions */ 135 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 136 fildl .fpzero_const(%rip) 137 /* dummy load changes all exception pointers */ 138 STTS(%rsi) /* trap on next fpu touch */ 139 1: rep; ret /* use 2 byte return instruction when branch target */ 140 /* AMD Software Optimization Guide - Section 6.2 */ 141 SET_SIZE(fpxsave_excp_clr_ctxt) 142 143 ENTRY_NP(xsave_excp_clr_ctxt) 144 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 145 jne 1f 146 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 147 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax 148 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 149 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 150 xsave (%rsi) 151 btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ 152 jnc 0f /* jump if ES = 0 */ 153 fnclex /* clear pending x87 exceptions */ 154 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 155 fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ 156 STTS(%rsi) /* trap on next fpu touch */ 157 1: ret 158 SET_SIZE(xsave_excp_clr_ctxt) 159 160 ENTRY_NP(xsaveopt_excp_clr_ctxt) 161 cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) 162 jne 1f 163 movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 164 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax 165 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 166 movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 167 xsaveopt (%rsi) 168 btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */ 169 jnc 0f /* jump if ES = 0 */ 170 fnclex /* clear pending x87 exceptions */ 171 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ 172 fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */ 173 STTS(%rsi) /* trap on next fpu touch */ 174 1: ret 175 SET_SIZE(xsaveopt_excp_clr_ctxt) 176 177 .align 8 178 .fpzero_const: 179 .4byte 0x0 180 .4byte 0x0 181 182 183 ENTRY_NP(fpxsave) 184 CLTS 185 fxsaveq (%rdi) 186 fninit /* clear exceptions, init x87 tags */ 187 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 188 ret 189 SET_SIZE(fpxsave) 190 191 ENTRY_NP(xsave) 192 CLTS 193 movl %esi, %eax /* bv mask */ 194 movq %rsi, %rdx 195 shrq $32, %rdx 196 xsave (%rdi) 197 198 fninit /* clear exceptions, init x87 tags */ 199 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 200 ret 201 SET_SIZE(xsave) 202 203 ENTRY_NP(xsaveopt) 204 CLTS 205 movl %esi, %eax /* bv mask */ 206 movq %rsi, %rdx 207 shrq $32, %rdx 208 xsaveopt (%rdi) 209 210 fninit /* clear exceptions, init x87 tags */ 211 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 212 ret 213 SET_SIZE(xsaveopt) 214 215 /* 216 * These functions are used when restoring the FPU as part of the epilogue of a 217 * context switch. 218 */ 219 220 ENTRY(fpxrestore_ctxt) 221 cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) 222 jne 1f 223 movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 224 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ 225 CLTS 226 fxrstorq (%rdi) 227 1: 228 ret 229 SET_SIZE(fpxrestore_ctxt) 230 231 ENTRY(xrestore_ctxt) 232 cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) 233 jne 1f 234 movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) 235 movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ 236 movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx 237 movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */ 238 CLTS 239 xrstor (%rdi) 240 1: 241 ret 242 SET_SIZE(xrestore_ctxt) 243 244 245 ENTRY_NP(fpxrestore) 246 CLTS 247 fxrstorq (%rdi) 248 ret 249 SET_SIZE(fpxrestore) 250 251 ENTRY_NP(xrestore) 252 CLTS 253 movl %esi, %eax /* bv mask */ 254 movq %rsi, %rdx 255 shrq $32, %rdx 256 xrstor (%rdi) 257 ret 258 SET_SIZE(xrestore) 259 260 /* 261 * Disable the floating point unit. 262 */ 263 264 ENTRY_NP(fpdisable) 265 STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ 266 ret 267 SET_SIZE(fpdisable) 268 269 /* 270 * Initialize the fpu hardware. 271 */ 272 273 ENTRY_NP(fpinit) 274 CLTS 275 cmpl $FP_XSAVE, fp_save_mech 276 je 1f 277 278 /* fxsave */ 279 leaq sse_initial(%rip), %rax 280 fxrstorq (%rax) /* load clean initial state */ 281 ret 282 283 1: /* xsave */ 284 leaq avx_initial(%rip), %rcx 285 xorl %edx, %edx 286 movl $XFEATURE_AVX, %eax 287 bt $X86FSET_AVX, x86_featureset 288 cmovael %edx, %eax 289 orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax 290 xrstor (%rcx) 291 ret 292 SET_SIZE(fpinit) 293 294 /* 295 * Clears FPU exception state. 296 * Returns the FP status word. 297 */ 298 299 ENTRY_NP(fperr_reset) 300 CLTS 301 xorl %eax, %eax 302 fnstsw %ax 303 fnclex 304 ret 305 SET_SIZE(fperr_reset) 306 307 ENTRY_NP(fpxerr_reset) 308 pushq %rbp 309 movq %rsp, %rbp 310 subq $0x10, %rsp /* make some temporary space */ 311 CLTS 312 stmxcsr (%rsp) 313 movl (%rsp), %eax 314 andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) 315 ldmxcsr (%rsp) /* clear processor exceptions */ 316 leave 317 ret 318 SET_SIZE(fpxerr_reset) 319 320 ENTRY_NP(fpgetcwsw) 321 pushq %rbp 322 movq %rsp, %rbp 323 subq $0x10, %rsp /* make some temporary space */ 324 CLTS 325 fnstsw (%rsp) /* store the status word */ 326 fnstcw 2(%rsp) /* store the control word */ 327 movl (%rsp), %eax /* put both in %eax */ 328 leave 329 ret 330 SET_SIZE(fpgetcwsw) 331 332 /* 333 * Returns the MXCSR register. 334 */ 335 336 ENTRY_NP(fpgetmxcsr) 337 pushq %rbp 338 movq %rsp, %rbp 339 subq $0x10, %rsp /* make some temporary space */ 340 CLTS 341 stmxcsr (%rsp) 342 movl (%rsp), %eax 343 leave 344 ret 345 SET_SIZE(fpgetmxcsr) 346