1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2018, Joyent, Inc.
  25  */
  26 
  27 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
  29 /*        All Rights Reserved   */
  30 
  31 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  32 /*        All Rights Reserved   */
  33 
  34 /*
  35  * Copyright (c) 2009, Intel Corporation.
  36  * All rights reserved.
  37  */
  38 
  39 #include <sys/asm_linkage.h>
  40 #include <sys/asm_misc.h>
  41 #include <sys/regset.h>
  42 #include <sys/privregs.h>
  43 #include <sys/x86_archext.h>
  44 
  45 #include "assym.h"
  46 
  47         /*
  48          * Returns zero if x87 "chip" is present(!)
  49          */
  50         ENTRY_NP(fpu_initial_probe)
  51         CLTS
  52         fninit
  53         fnstsw  %ax
  54         movzbl  %al, %eax
  55         ret
  56         SET_SIZE(fpu_initial_probe)
  57 
  58         ENTRY_NP(fxsave_insn)
  59         fxsaveq (%rdi)
  60         ret
  61         SET_SIZE(fxsave_insn)
  62 
  63 /*
  64  * One of these routines is called from any lwp with floating
  65  * point context as part of the prolog of a context switch.
  66  */
  67 
  68 /*
  69  * These three functions define the Intel "xsave" handling for CPUs with
  70  * different features. Newer AMD CPUs can also use these functions. See the
  71  * 'exception pointers' comment below.
  72  */
  73         ENTRY_NP(fpxsave_ctxt)  /* %rdi is a struct fpu_ctx */
  74         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
  75         jne     1f
  76         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
  77         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
  78         fxsaveq (%rdi)
  79         STTS(%rsi)      /* trap on next fpu touch */
  80 1:      rep;    ret     /* use 2 byte return instruction when branch target */
  81                         /* AMD Software Optimization Guide - Section 6.2 */
  82         SET_SIZE(fpxsave_ctxt)
  83 
  84         ENTRY_NP(xsave_ctxt)
  85         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
  86         jne     1f
  87         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
  88         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
  89         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
  90         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
  91         xsave   (%rsi)
  92         STTS(%rsi)      /* trap on next fpu touch */
  93 1:      ret
  94         SET_SIZE(xsave_ctxt)
  95 
  96         ENTRY_NP(xsaveopt_ctxt)
  97         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
  98         jne     1f
  99         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 100         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
 101         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 102         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 103         xsaveopt (%rsi)
 104         STTS(%rsi)      /* trap on next fpu touch */
 105 1:      ret
 106         SET_SIZE(xsaveopt_ctxt)
 107 
 108 /*
 109  * On certain AMD processors, the "exception pointers" (i.e. the last
 110  * instruction pointer, last data pointer, and last opcode) are saved by the
 111  * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is
 112  * set.
 113  *
 114  * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior.
 115  * We can detect this via an AMD specific cpuid feature bit
 116  * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions.
 117  * Otherwise we use these more complex functions on AMD CPUs. All three follow
 118  * the same logic after the xsave* instruction.
 119  */
 120         ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */
 121         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 122         jne     1f
 123         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 124         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
 125         fxsaveq (%rdi)
 126         /*
 127          * To ensure that we don't leak these values into the next context
 128          * on the cpu, we could just issue an fninit here, but that's
 129          * rather slow and so we issue an instruction sequence that
 130          * clears them more quickly, if a little obscurely.
 131          */
 132         btw     $7, FXSAVE_STATE_FSW(%rdi)      /* Test saved ES bit */
 133         jnc     0f                              /* jump if ES = 0 */
 134         fnclex          /* clear pending x87 exceptions */
 135 0:      ffree   %st(7)  /* clear tag bit to remove possible stack overflow */
 136         fildl   .fpzero_const(%rip)
 137                         /* dummy load changes all exception pointers */
 138         STTS(%rsi)      /* trap on next fpu touch */
 139 1:      rep;    ret     /* use 2 byte return instruction when branch target */
 140                         /* AMD Software Optimization Guide - Section 6.2 */
 141         SET_SIZE(fpxsave_excp_clr_ctxt)
 142 
 143         ENTRY_NP(xsave_excp_clr_ctxt)
 144         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 145         jne     1f
 146         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 147         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
 148         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 149         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 150         xsave   (%rsi)
 151         btw     $7, FXSAVE_STATE_FSW(%rsi)      /* Test saved ES bit */
 152         jnc     0f                              /* jump if ES = 0 */
 153         fnclex          /* clear pending x87 exceptions */
 154 0:      ffree   %st(7)  /* clear tag bit to remove possible stack overflow */
 155         fildl   .fpzero_const(%rip) /* dummy load changes all excp. pointers */
 156         STTS(%rsi)      /* trap on next fpu touch */
 157 1:      ret
 158         SET_SIZE(xsave_excp_clr_ctxt)
 159 
 160         ENTRY_NP(xsaveopt_excp_clr_ctxt)
 161         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 162         jne     1f
 163         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 164         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
 165         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 166         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 167         xsaveopt (%rsi)
 168         btw     $7, FXSAVE_STATE_FSW(%rsi)      /* Test saved ES bit */
 169         jnc     0f                              /* jump if ES = 0 */
 170         fnclex          /* clear pending x87 exceptions */
 171 0:      ffree   %st(7)  /* clear tag bit to remove possible stack overflow */
 172         fildl   .fpzero_const(%rip) /* dummy load changes all excp. pointers */
 173         STTS(%rsi)      /* trap on next fpu touch */
 174 1:      ret
 175         SET_SIZE(xsaveopt_excp_clr_ctxt)
 176 
 177         .align  8
 178 .fpzero_const:
 179         .4byte  0x0
 180         .4byte  0x0
 181 
 182 
 183         ENTRY_NP(fpxsave)
 184         CLTS
 185         fxsaveq (%rdi)
 186         fninit                          /* clear exceptions, init x87 tags */
 187         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */
 188         ret
 189         SET_SIZE(fpxsave)
 190 
 191         ENTRY_NP(xsave)
 192         CLTS
 193         movl    %esi, %eax              /* bv mask */
 194         movq    %rsi, %rdx
 195         shrq    $32, %rdx
 196         xsave   (%rdi)
 197 
 198         fninit                          /* clear exceptions, init x87 tags */
 199         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */
 200         ret
 201         SET_SIZE(xsave)
 202 
 203         ENTRY_NP(xsaveopt)
 204         CLTS
 205         movl    %esi, %eax              /* bv mask */
 206         movq    %rsi, %rdx
 207         shrq    $32, %rdx
 208         xsaveopt (%rdi)
 209 
 210         fninit                          /* clear exceptions, init x87 tags */
 211         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */
 212         ret
 213         SET_SIZE(xsaveopt)
 214 
 215 /*
 216  * These functions are used when restoring the FPU as part of the epilogue of a
 217  * context switch.
 218  */
 219 
 220         ENTRY(fpxrestore_ctxt)
 221         cmpl    $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
 222         jne     1f
 223         movl    $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 224         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
 225         CLTS
 226         fxrstorq        (%rdi)
 227 1:
 228         ret
 229         SET_SIZE(fpxrestore_ctxt)
 230 
 231         ENTRY(xrestore_ctxt)
 232         cmpl    $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
 233         jne     1f
 234         movl    $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 235         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
 236         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 237         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 238         CLTS
 239         xrstor  (%rdi)
 240 1:
 241         ret
 242         SET_SIZE(xrestore_ctxt)
 243 
 244 
 245         ENTRY_NP(fpxrestore)
 246         CLTS
 247         fxrstorq        (%rdi)
 248         ret
 249         SET_SIZE(fpxrestore)
 250 
 251         ENTRY_NP(xrestore)
 252         CLTS
 253         movl    %esi, %eax              /* bv mask */
 254         movq    %rsi, %rdx
 255         shrq    $32, %rdx
 256         xrstor  (%rdi)
 257         ret
 258         SET_SIZE(xrestore)
 259 
 260 /*
 261  * Disable the floating point unit.
 262  */
 263 
 264         ENTRY_NP(fpdisable)
 265         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */ 
 266         ret
 267         SET_SIZE(fpdisable)
 268 
 269 /*
 270  * Initialize the fpu hardware.
 271  */
 272 
 273         ENTRY_NP(fpinit)
 274         CLTS
 275         cmpl    $FP_XSAVE, fp_save_mech
 276         je      1f
 277 
 278         /* fxsave */
 279         leaq    sse_initial(%rip), %rax
 280         fxrstorq        (%rax)                  /* load clean initial state */
 281         ret
 282 
 283 1:      /* xsave */
 284         leaq    avx_initial(%rip), %rcx
 285         xorl    %edx, %edx
 286         movl    $XFEATURE_AVX, %eax
 287         bt      $X86FSET_AVX, x86_featureset
 288         cmovael %edx, %eax
 289         orl     $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
 290         xrstor (%rcx)
 291         ret
 292         SET_SIZE(fpinit)
 293 
 294 /*
 295  * Clears FPU exception state.
 296  * Returns the FP status word.
 297  */
 298 
 299         ENTRY_NP(fperr_reset)
 300         CLTS
 301         xorl    %eax, %eax
 302         fnstsw  %ax
 303         fnclex
 304         ret
 305         SET_SIZE(fperr_reset)
 306 
 307         ENTRY_NP(fpxerr_reset)
 308         pushq   %rbp
 309         movq    %rsp, %rbp
 310         subq    $0x10, %rsp             /* make some temporary space */
 311         CLTS
 312         stmxcsr (%rsp)
 313         movl    (%rsp), %eax
 314         andl    $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
 315         ldmxcsr (%rsp)                  /* clear processor exceptions */
 316         leave
 317         ret
 318         SET_SIZE(fpxerr_reset)
 319 
 320         ENTRY_NP(fpgetcwsw)
 321         pushq   %rbp
 322         movq    %rsp, %rbp
 323         subq    $0x10, %rsp             /* make some temporary space    */
 324         CLTS
 325         fnstsw  (%rsp)                  /* store the status word        */
 326         fnstcw  2(%rsp)                 /* store the control word       */
 327         movl    (%rsp), %eax            /* put both in %eax             */
 328         leave
 329         ret
 330         SET_SIZE(fpgetcwsw)
 331 
 332 /*
 333  * Returns the MXCSR register.
 334  */
 335 
 336         ENTRY_NP(fpgetmxcsr)
 337         pushq   %rbp
 338         movq    %rsp, %rbp
 339         subq    $0x10, %rsp             /* make some temporary space */
 340         CLTS
 341         stmxcsr (%rsp)
 342         movl    (%rsp), %eax
 343         leave
 344         ret
 345         SET_SIZE(fpgetmxcsr)
 346