1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2018, Joyent, Inc.
  25  */
  26 
  27 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
  29 /*        All Rights Reserved   */
  30 
  31 /*      Copyright (c) 1987, 1988 Microsoft Corporation  */
  32 /*        All Rights Reserved   */
  33 
  34 /*
  35  * Copyright (c) 2009, Intel Corporation.
  36  * All rights reserved.
  37  */
  38 
  39 #include <sys/asm_linkage.h>
  40 #include <sys/asm_misc.h>
  41 #include <sys/regset.h>
  42 #include <sys/privregs.h>
  43 #include <sys/x86_archext.h>
  44 
  45 #if defined(__lint)
  46 #include <sys/types.h>
  47 #include <sys/fp.h>
  48 #else
  49 #include "assym.h"
  50 #endif
  51 
  52 #if defined(__lint)
  53  
  54 uint_t
  55 fpu_initial_probe(void)
  56 { return (0); }
  57 
  58 #else   /* __lint */
  59 
  60         /*
  61          * Returns zero if x87 "chip" is present(!)
  62          */
  63         ENTRY_NP(fpu_initial_probe)
  64         CLTS
  65         fninit
  66         fnstsw  %ax
  67         movzbl  %al, %eax
  68         ret
  69         SET_SIZE(fpu_initial_probe)
  70 
  71 #endif  /* __lint */
  72 
  73 #if defined(__lint)
  74 
  75 /*ARGSUSED*/
  76 void
  77 fxsave_insn(struct fxsave_state *fx)
  78 {}
  79 
  80 #else   /* __lint */
  81 
  82         ENTRY_NP(fxsave_insn)
  83         fxsaveq (%rdi)
  84         ret
  85         SET_SIZE(fxsave_insn)
  86 
  87 #endif  /* __lint */
  88 
  89 /*
  90  * One of these routines is called from any lwp with floating
  91  * point context as part of the prolog of a context switch.
  92  */
  93 
  94 #if defined(__lint)
  95 
  96 /*ARGSUSED*/
  97 void
  98 xsave_ctxt(void *arg)
  99 {}
 100 
 101 /*ARGSUSED*/
 102 void
 103 xsaveopt_ctxt(void *arg)
 104 {}
 105 
 106 /*ARGSUSED*/
 107 void
 108 fpxsave_ctxt(void *arg)
 109 {}
 110 
 111 #else   /* __lint */
 112 
 113 /*
 114  * These three functions define the Intel "xsave" handling for CPUs with
 115  * different features. Newer AMD CPUs can also use these functions. See the
 116  * 'exception pointers' comment below.
 117  */
 118         ENTRY_NP(fpxsave_ctxt)  /* %rdi is a struct fpu_ctx */
 119         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 120         jne     1f
 121         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 122         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
 123         fxsaveq (%rdi)
 124         STTS(%rsi)      /* trap on next fpu touch */
 125 1:      rep;    ret     /* use 2 byte return instruction when branch target */
 126                         /* AMD Software Optimization Guide - Section 6.2 */
 127         SET_SIZE(fpxsave_ctxt)
 128 
 129         ENTRY_NP(xsave_ctxt)
 130         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 131         jne     1f
 132         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 133         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
 134         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 135         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 136         xsave   (%rsi)
 137         STTS(%rsi)      /* trap on next fpu touch */
 138 1:      ret
 139         SET_SIZE(xsave_ctxt)
 140 
 141         ENTRY_NP(xsaveopt_ctxt)
 142         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 143         jne     1f
 144         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 145         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
 146         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 147         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 148         xsaveopt (%rsi)
 149         STTS(%rsi)      /* trap on next fpu touch */
 150 1:      ret
 151         SET_SIZE(xsaveopt_ctxt)
 152 
 153 /*
 154  * On certain AMD processors, the "exception pointers" (i.e. the last
 155  * instruction pointer, last data pointer, and last opcode) are saved by the
 156  * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is
 157  * set.
 158  *
 159  * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior.
 160  * We can detect this via an AMD specific cpuid feature bit
 161  * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions.
 162  * Otherwise we use these more complex functions on AMD CPUs. All three follow
 163  * the same logic after the xsave* instruction.
 164  */
 165         ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */
 166         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 167         jne     1f
 168         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 169         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
 170         fxsaveq (%rdi)
 171         /*
 172          * To ensure that we don't leak these values into the next context
 173          * on the cpu, we could just issue an fninit here, but that's
 174          * rather slow and so we issue an instruction sequence that
 175          * clears them more quickly, if a little obscurely.
 176          */
 177         btw     $7, FXSAVE_STATE_FSW(%rdi)      /* Test saved ES bit */
 178         jnc     0f                              /* jump if ES = 0 */
 179         fnclex          /* clear pending x87 exceptions */
 180 0:      ffree   %st(7)  /* clear tag bit to remove possible stack overflow */
 181         fildl   .fpzero_const(%rip)
 182                         /* dummy load changes all exception pointers */
 183         STTS(%rsi)      /* trap on next fpu touch */
 184 1:      rep;    ret     /* use 2 byte return instruction when branch target */
 185                         /* AMD Software Optimization Guide - Section 6.2 */
 186         SET_SIZE(fpxsave_excp_clr_ctxt)
 187 
 188         ENTRY_NP(xsave_excp_clr_ctxt)
 189         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 190         jne     1f
 191         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 192         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
 193         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 194         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 195         xsave   (%rsi)
 196         btw     $7, FXSAVE_STATE_FSW(%rsi)      /* Test saved ES bit */
 197         jnc     0f                              /* jump if ES = 0 */
 198         fnclex          /* clear pending x87 exceptions */
 199 0:      ffree   %st(7)  /* clear tag bit to remove possible stack overflow */
 200         fildl   .fpzero_const(%rip) /* dummy load changes all excp. pointers */
 201         STTS(%rsi)      /* trap on next fpu touch */
 202 1:      ret
 203         SET_SIZE(xsave_excp_clr_ctxt)
 204 
 205         ENTRY_NP(xsaveopt_excp_clr_ctxt)
 206         cmpl    $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
 207         jne     1f
 208         movl    $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 209         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
 210         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 211         movq    FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 212         xsaveopt (%rsi)
 213         btw     $7, FXSAVE_STATE_FSW(%rsi)      /* Test saved ES bit */
 214         jnc     0f                              /* jump if ES = 0 */
 215         fnclex          /* clear pending x87 exceptions */
 216 0:      ffree   %st(7)  /* clear tag bit to remove possible stack overflow */
 217         fildl   .fpzero_const(%rip) /* dummy load changes all excp. pointers */
 218         STTS(%rsi)      /* trap on next fpu touch */
 219 1:      ret
 220         SET_SIZE(xsaveopt_excp_clr_ctxt)
 221 
 222         .align  8
 223 .fpzero_const:
 224         .4byte  0x0
 225         .4byte  0x0
 226 
 227 #endif  /* __lint */
 228 
 229 
 230 #if defined(__lint)
 231 
 232 /*ARGSUSED*/
 233 void
 234 fpsave(struct fnsave_state *f)
 235 {}
 236 
 237 /*ARGSUSED*/
 238 void
 239 fpxsave(struct fxsave_state *f)
 240 {}
 241 
 242 /*ARGSUSED*/
 243 void
 244 xsave(struct xsave_state *f, uint64_t m)
 245 {}
 246 
 247 /*ARGSUSED*/
 248 void
 249 xsaveopt(struct xsave_state *f, uint64_t m)
 250 {}
 251 
 252 #else   /* __lint */
 253 
 254         ENTRY_NP(fpxsave)
 255         CLTS
 256         fxsaveq (%rdi)
 257         fninit                          /* clear exceptions, init x87 tags */
 258         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */
 259         ret
 260         SET_SIZE(fpxsave)
 261 
 262         ENTRY_NP(xsave)
 263         CLTS
 264         movl    %esi, %eax              /* bv mask */
 265         movq    %rsi, %rdx
 266         shrq    $32, %rdx
 267         xsave   (%rdi)
 268 
 269         fninit                          /* clear exceptions, init x87 tags */
 270         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */
 271         ret
 272         SET_SIZE(xsave)
 273 
 274         ENTRY_NP(xsaveopt)
 275         CLTS
 276         movl    %esi, %eax              /* bv mask */
 277         movq    %rsi, %rdx
 278         shrq    $32, %rdx
 279         xsaveopt (%rdi)
 280 
 281         fninit                          /* clear exceptions, init x87 tags */
 282         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */
 283         ret
 284         SET_SIZE(xsaveopt)
 285 
 286 #endif  /* __lint */
 287 
 288 /*
 289  * These functions are used when restoring the FPU as part of the epilogue of a
 290  * context switch.
 291  */
 292 
 293 #if defined(__lint)
 294 
 295 /*ARGSUSED*/
 296 void
 297 fpxrestore_ctxt(void *arg)
 298 {}
 299 
 300 /*ARGSUSED*/
 301 void
 302 xrestore_ctxt(void *arg)
 303 {}
 304 
 305 #else   /* __lint */
 306 
 307         ENTRY(fpxrestore_ctxt)
 308         cmpl    $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
 309         jne     1f
 310         movl    $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 311         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
 312         CLTS
 313         fxrstorq        (%rdi)
 314 1:
 315         ret
 316         SET_SIZE(fpxrestore_ctxt)
 317 
 318         ENTRY(xrestore_ctxt)
 319         cmpl    $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
 320         jne     1f
 321         movl    $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
 322         movl    FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
 323         movl    FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
 324         movq    FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */
 325         CLTS
 326         xrstor  (%rdi)
 327 1:
 328         ret
 329         SET_SIZE(xrestore_ctxt)
 330 
 331 #endif  /* __lint */
 332 
 333 
 334 #if defined(__lint)
 335 
 336 /*ARGSUSED*/
 337 void
 338 fpxrestore(struct fxsave_state *f)
 339 {}
 340 
 341 /*ARGSUSED*/
 342 void
 343 xrestore(struct xsave_state *f, uint64_t m)
 344 {}
 345 
 346 #else   /* __lint */
 347 
 348         ENTRY_NP(fpxrestore)
 349         CLTS
 350         fxrstorq        (%rdi)
 351         ret
 352         SET_SIZE(fpxrestore)
 353 
 354         ENTRY_NP(xrestore)
 355         CLTS
 356         movl    %esi, %eax              /* bv mask */
 357         movq    %rsi, %rdx
 358         shrq    $32, %rdx
 359         xrstor  (%rdi)
 360         ret
 361         SET_SIZE(xrestore)
 362 
 363 #endif  /* __lint */
 364 
 365 /*
 366  * Disable the floating point unit.
 367  */
 368 
 369 #if defined(__lint)
 370 
 371 void
 372 fpdisable(void)
 373 {}
 374 
 375 #else   /* __lint */
 376 
 377         ENTRY_NP(fpdisable)
 378         STTS(%rdi)                      /* set TS bit in %cr0 (disable FPU) */ 
 379         ret
 380         SET_SIZE(fpdisable)
 381 
 382 #endif  /* __lint */
 383 
 384 /*
 385  * Initialize the fpu hardware.
 386  */
 387 
 388 #if defined(__lint)
 389 
 390 void
 391 fpinit(void)
 392 {}
 393 
 394 #else   /* __lint */
 395 
 396         ENTRY_NP(fpinit)
 397         CLTS
 398         cmpl    $FP_XSAVE, fp_save_mech
 399         je      1f
 400 
 401         /* fxsave */
 402         leaq    sse_initial(%rip), %rax
 403         fxrstorq        (%rax)                  /* load clean initial state */
 404         ret
 405 
 406 1:      /* xsave */
 407         leaq    avx_initial(%rip), %rcx
 408         xorl    %edx, %edx
 409         movl    $XFEATURE_AVX, %eax
 410         bt      $X86FSET_AVX, x86_featureset
 411         cmovael %edx, %eax
 412         orl     $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
 413         xrstor (%rcx)
 414         ret
 415         SET_SIZE(fpinit)
 416 
 417 #endif  /* __lint */
 418 
 419 /*
 420  * Clears FPU exception state.
 421  * Returns the FP status word.
 422  */
 423 
 424 #if defined(__lint)
 425 
 426 uint32_t
 427 fperr_reset(void)
 428 { return (0); }
 429 
 430 uint32_t
 431 fpxerr_reset(void)
 432 { return (0); }
 433 
 434 #else   /* __lint */
 435 
 436         ENTRY_NP(fperr_reset)
 437         CLTS
 438         xorl    %eax, %eax
 439         fnstsw  %ax
 440         fnclex
 441         ret
 442         SET_SIZE(fperr_reset)
 443 
 444         ENTRY_NP(fpxerr_reset)
 445         pushq   %rbp
 446         movq    %rsp, %rbp
 447         subq    $0x10, %rsp             /* make some temporary space */
 448         CLTS
 449         stmxcsr (%rsp)
 450         movl    (%rsp), %eax
 451         andl    $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
 452         ldmxcsr (%rsp)                  /* clear processor exceptions */
 453         leave
 454         ret
 455         SET_SIZE(fpxerr_reset)
 456 
 457 #endif  /* __lint */
 458 
 459 #if defined(__lint)
 460 
 461 uint32_t
 462 fpgetcwsw(void)
 463 {
 464         return (0);
 465 }
 466 
 467 #else   /* __lint */
 468 
 469         ENTRY_NP(fpgetcwsw)
 470         pushq   %rbp
 471         movq    %rsp, %rbp
 472         subq    $0x10, %rsp             /* make some temporary space    */
 473         CLTS
 474         fnstsw  (%rsp)                  /* store the status word        */
 475         fnstcw  2(%rsp)                 /* store the control word       */
 476         movl    (%rsp), %eax            /* put both in %eax             */
 477         leave
 478         ret
 479         SET_SIZE(fpgetcwsw)
 480 
 481 #endif  /* __lint */
 482 
 483 /*
 484  * Returns the MXCSR register.
 485  */
 486 
 487 #if defined(__lint)
 488 
 489 uint32_t
 490 fpgetmxcsr(void)
 491 {
 492         return (0);
 493 }
 494 
 495 #else   /* __lint */
 496 
 497         ENTRY_NP(fpgetmxcsr)
 498         pushq   %rbp
 499         movq    %rsp, %rbp
 500         subq    $0x10, %rsp             /* make some temporary space */
 501         CLTS
 502         stmxcsr (%rsp)
 503         movl    (%rsp), %eax
 504         leave
 505         ret
 506         SET_SIZE(fpgetmxcsr)
 507 
 508 #endif  /* __lint */