2 New usr/src/uts/sparc/v9/ml/float.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #ident  "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/asm_linkage.h>
  29 #include <sys/trap.h>
  30 #include <sys/machpcb.h>
  31 #include <sys/machtrap.h>
  32 #include <sys/machsig.h>
  33 #include <sys/machthread.h>
  34 
  35 #include "assym.h"
  36 
  37 /*
  38  * Floating point trap handling.
  39  *
  40  *      The FPU is always in a V9 current configuration.
  41  *
  42  *      When a user process is first started via exec,
  43  *      floating point operations will be disabled by default.
  44  *      Upon execution of the first floating point instruction,
  45  *      a fp_disabled trap will be generated; then a word in
  46  *      the uarea is written signifying use of the floating point
  47  *      registers so that subsequent context switches will save
  48  *      and restore the floating point them. The trapped instruction
  49  *      will be restarted and processing will continue as normal.
  50  *
  51  *      When a operation occurs that the hardware cannot properly
  52  *      handle, an unfinshed fp_op exception will be generated.
  53  *      Software routines in the kernel will be executed to
  54  *      simulate proper handling of such conditions.
  55  *
  56  *      Exception handling will emulate all instructions
  57  *      in the floating point address queue. Note that there
  58  *      is no %fq in sun4u, because it has precise FP traps.
  59  *
  60  *      Floating point queues are now machine dependent, and std %fq
  61  *      is an illegal V9 instruction. The fp_exception code has been
  62  *      moved to sun4u/ml/machfloat.s.
  63  *
  64  *      NOTE: This code DOES NOT SUPPORT KERNEL (DEVICE DRIVER)
  65  *              USE OF THE FPU
  66  *
  67  *      Instructions for running without the hardware fpu:
  68  *      1. Setting fpu_exists to 0 now only works on a DEBUG kernel.
  69  *      2. adb -w unix and set fpu_exists, use_hw_bcopy, use_hw_copyio, and
  70  *              use_hw_bzero to 0 and rename libc_psr.so.1 in
  71  *              /usr/platform/sun4u/lib so that it will not get used by
  72  *              the libc bcopy routines. Then reboot the system and you
  73  *              should see the bootup message "FPU not in use".
  74  *      3. To run kaos, you must comment out the code which sets the
  75  *              version number of the fsr to 7, in fldst: stfsr/stxfsr
  76  *              (unless you are running against a comparison system that
  77  *              has the same fsr version number).
  78  *      4. The stqf{a}/ldqf{a} instructions cause kaos errors, for reasons
  79  *              that appear to be a kaos bug, so don't use them!
  80  */
  81 
  82         .section ".data"
  83         .align  8
  84 fsrholder:
  85         .word   0                       ! dummy place to write fsr
  86         .word   0
  87 
  88         DGDEF(fpu_exists)               ! always exists for V9
  89 #ifdef FP_DISABLED
  90         .word   0
  91 #else
  92         .word   1                       ! sundiag (gack) uses this variable
  93 #endif
  94 
  95         DGDEF(fpu_version)
  96         .word   -1
  97 
  98 /*
  99  * FPU probe - read the %fsr and get fpu_version.
 100  * Called from autoconf. If a %fq is created for
 101  * future cpu versions, a fq_exists variable
 102  * could be created by this function.
 103  */
 104 
 105         ENTRY_NP(fpu_probe)
 106         wr      %g0, FPRS_FEF, %fprs    ! enable fpu in fprs
 107         rdpr    %pstate, %g2            ! read pstate, save value in %g2
 108         or      %g2, PSTATE_PEF, %g1    ! new pstate with fpu enabled
 109         wrpr    %g1, %g0, %pstate       ! write pstate
 110 
 111         sethi   %hi(fsrholder), %g2
 112         stx     %fsr, [%g2 + %lo(fsrholder)]
 113         ldx     [%g2 + %lo(fsrholder)], %g2     ! snarf the FSR
 114         set     FSR_VER, %g1
 115         and     %g2, %g1, %g2                   ! get version
 116         srl     %g2, FSR_VER_SHIFT, %g2         ! and shift it down
 117         sethi   %hi(fpu_version), %g3           ! save the FPU version
 118         st      %g2, [%g3 + %lo(fpu_version)]
 119 
 120         ba      fp_kstat_init           ! initialize the fpu_kstat
 121         wr      %g0, %g0, %fprs         ! disable fpu and clear fprs
 122         SET_SIZE(fpu_probe)
 123 
 124 /*
 125  * fp_clearregs(fp)
 126  *      struct v9_fpu *fp;
 127  *
 128  * Initialization for the hardware fpu.
 129  * Clear the fsr and initialize registers to NaN (-1)
 130  * The caller (fp_disabled) is supposed to update the fprs
 131  * so when the return to userland is made, the fpu is enabled.
 132  */
 133 
 134         ENTRY_NP(fp_clearregs)
 135         ldx     [%o0 + FPU_FSR], %fsr           ! load fsr
 136 
 137         mov     -1, %g2                         ! -1 is NaN
 138         stx     %g2, [%o0]                      ! initialize %f0
 139         ldd     [%o0], %d0
 140         ldd     [%o0], %d2
 141         ldd     [%o0], %d4
 142         ldd     [%o0], %d6
 143         ldd     [%o0], %d8
 144         ldd     [%o0], %d10
 145         ldd     [%o0], %d12
 146         ldd     [%o0], %d14
 147         ldd     [%o0], %d16
 148         ldd     [%o0], %d18
 149         ldd     [%o0], %d20
 150         ldd     [%o0], %d22
 151         ldd     [%o0], %d24
 152         ldd     [%o0], %d26
 153         ldd     [%o0], %d28
 154         ldd     [%o0], %d30
 155         ldd     [%o0], %d32
 156         ldd     [%o0], %d34
 157         ldd     [%o0], %d36
 158         ldd     [%o0], %d38
 159         ldd     [%o0], %d40
 160         ldd     [%o0], %d42
 161         ldd     [%o0], %d44
 162         ldd     [%o0], %d46
 163         ldd     [%o0], %d48
 164         ldd     [%o0], %d50
 165         ldd     [%o0], %d52
 166         ldd     [%o0], %d54
 167         ldd     [%o0], %d56
 168         ldd     [%o0], %d58
 169         ldd     [%o0], %d60
 170         retl
 171         ldd     [%o0], %d62
 172         SET_SIZE(fp_clearregs)
 173 
 174 /*
 175  * void _fp_read_pfreg(pf, n)
 176  *      uint32_t        *pf;    Old freg value.
 177  *      unsigned        n;      Want to read register n
 178  *
 179  * {
 180  *      *pf = %f[n];
 181  * }
 182  *
 183  * void
 184  * _fp_write_pfreg(pf, n)
 185  *      uint32_t        *pf;    New freg value.
 186  *      unsigned        n;      Want to write register n.
 187  *
 188  * {
 189  *      %f[n] = *pf;
 190  * }
 191  */
 192 
 193         ENTRY_NP(_fp_read_pfreg)
 194         sll     %o1, 3, %o1             ! Table entries are 8 bytes each.
 195         set     .stable, %g1            ! g1 gets base of table.
 196         jmp     %g1 + %o1               ! Jump into table
 197         nop                             ! Can't follow CTI by CTI.
 198 
 199         ENTRY_NP(_fp_write_pfreg)
 200         sll     %o1, 3, %o1             ! Table entries are 8 bytes each.
 201         set     .ltable, %g1            ! g1 gets base of table.
 202         jmp     %g1 + %o1               ! Jump into table
 203         nop                             ! Can't follow CTI by CTI.
 204 
 205 #define STOREFP(n) jmp %o7+8 ; st %f/**/n, [%o0]
 206 
 207 .stable:
 208         STOREFP(0)
 209         STOREFP(1)
 210         STOREFP(2)
 211         STOREFP(3)
 212         STOREFP(4)
 213         STOREFP(5)
 214         STOREFP(6)
 215         STOREFP(7)
 216         STOREFP(8)
 217         STOREFP(9)
 218         STOREFP(10)
 219         STOREFP(11)
 220         STOREFP(12)
 221         STOREFP(13)
 222         STOREFP(14)
 223         STOREFP(15)
 224         STOREFP(16)
 225         STOREFP(17)
 226         STOREFP(18)
 227         STOREFP(19)
 228         STOREFP(20)
 229         STOREFP(21)
 230         STOREFP(22)
 231         STOREFP(23)
 232         STOREFP(24)
 233         STOREFP(25)
 234         STOREFP(26)
 235         STOREFP(27)
 236         STOREFP(28)
 237         STOREFP(29)
 238         STOREFP(30)
 239         STOREFP(31)
 240 
 241 #define LOADFP(n) jmp %o7+8 ; ld [%o0],%f/**/n
 242 
 243 .ltable:
 244         LOADFP(0)
 245         LOADFP(1)
 246         LOADFP(2)
 247         LOADFP(3)
 248         LOADFP(4)
 249         LOADFP(5)
 250         LOADFP(6)
 251         LOADFP(7)
 252         LOADFP(8)
 253         LOADFP(9)
 254         LOADFP(10)
 255         LOADFP(11)
 256         LOADFP(12)
 257         LOADFP(13)
 258         LOADFP(14)
 259         LOADFP(15)
 260         LOADFP(16)
 261         LOADFP(17)
 262         LOADFP(18)
 263         LOADFP(19)
 264         LOADFP(20)
 265         LOADFP(21)
 266         LOADFP(22)
 267         LOADFP(23)
 268         LOADFP(24)
 269         LOADFP(25)
 270         LOADFP(26)
 271         LOADFP(27)
 272         LOADFP(28)
 273         LOADFP(29)
 274         LOADFP(30)
 275         LOADFP(31)
 276         SET_SIZE(_fp_read_pfreg)
 277         SET_SIZE(_fp_write_pfreg)
 278 
 279 /*
 280  * void _fp_read_pdreg(
 281  *      uint64_t        *pd,    Old dreg value.
 282  *      u_int   n)              Want to read register n
 283  *
 284  * {
 285  *      *pd = %d[n];
 286  * }
 287  *
 288  * void
 289  * _fp_write_pdreg(
 290  *      uint64_t        *pd,    New dreg value.
 291  *      u_int   n)              Want to write register n.
 292  *
 293  * {
 294  *      %d[n] = *pd;
 295  * }
 296  */
 297 
 298         ENTRY_NP(_fp_read_pdreg)
 299         sll     %o1, 3, %o1             ! Table entries are 8 bytes each.
 300         set     .dstable, %g1           ! g1 gets base of table.
 301         jmp     %g1 + %o1               ! Jump into table
 302         nop                             ! Can't follow CTI by CTI.
 303 
 304         ENTRY_NP(_fp_write_pdreg)
 305         sll     %o1, 3, %o1             ! Table entries are 8 bytes each.
 306         set     .dltable, %g1           ! g1 gets base of table.
 307         jmp     %g1 + %o1               ! Jump into table
 308         nop                             ! Can't follow CTI by CTI.
 309 
 310 #define STOREDP(n) jmp %o7+8 ; std %d/**/n, [%o0]
 311 
 312 .dstable:
 313         STOREDP(0)
 314         STOREDP(2)
 315         STOREDP(4)
 316         STOREDP(6)
 317         STOREDP(8)
 318         STOREDP(10)
 319         STOREDP(12)
 320         STOREDP(14)
 321         STOREDP(16)
 322         STOREDP(18)
 323         STOREDP(20)
 324         STOREDP(22)
 325         STOREDP(24)
 326         STOREDP(26)
 327         STOREDP(28)
 328         STOREDP(30)
 329         STOREDP(32)
 330         STOREDP(34)
 331         STOREDP(36)
 332         STOREDP(38)
 333         STOREDP(40)
 334         STOREDP(42)
 335         STOREDP(44)
 336         STOREDP(46)
 337         STOREDP(48)
 338         STOREDP(50)
 339         STOREDP(52)
 340         STOREDP(54)
 341         STOREDP(56)
 342         STOREDP(58)
 343         STOREDP(60)
 344         STOREDP(62)
 345 
 346 #define LOADDP(n) jmp %o7+8 ; ldd [%o0],%d/**/n
 347 
 348 .dltable:
 349         LOADDP(0)
 350         LOADDP(2)
 351         LOADDP(4)
 352         LOADDP(6)
 353         LOADDP(8)
 354         LOADDP(10)
 355         LOADDP(12)
 356         LOADDP(14)
 357         LOADDP(16)
 358         LOADDP(18)
 359         LOADDP(20)
 360         LOADDP(22)
 361         LOADDP(24)
 362         LOADDP(26)
 363         LOADDP(28)
 364         LOADDP(30)
 365         LOADDP(32)
 366         LOADDP(34)
 367         LOADDP(36)
 368         LOADDP(38)
 369         LOADDP(40)
 370         LOADDP(42)
 371         LOADDP(44)
 372         LOADDP(46)
 373         LOADDP(48)
 374         LOADDP(50)
 375         LOADDP(52)
 376         LOADDP(54)
 377         LOADDP(56)
 378         LOADDP(58)
 379         LOADDP(60)
 380         LOADDP(62)
 381         SET_SIZE(_fp_read_pdreg)
 382         SET_SIZE(_fp_write_pdreg)
 383 
 384         ENTRY_NP(_fp_write_pfsr)
 385         retl
 386         ldx     [%o0], %fsr
 387         SET_SIZE(_fp_write_pfsr)
 388 
 389         ENTRY_NP(_fp_read_pfsr)
 390         retl
 391         stx     %fsr, [%o0]
 392         SET_SIZE(_fp_read_pfsr)
 393 
 394         ENTRY_NP(_fp_write_fprs)
 395         retl
 396         wr      %o0, %g0, %fprs                 ! write fprs
 397         SET_SIZE(_fp_write_fprs)
 398 
 399         ENTRY_NP(_fp_read_fprs)
 400         retl
 401         rd      %fprs, %o0                      ! save fprs
 402         SET_SIZE(_fp_read_fprs)
 403 
 404         ENTRY_NP(_fp_subcc_ccr)
 405         subcc   %o0, %o1, %g0
 406         retl
 407         rd      %ccr, %o0                       ! save ccr
 408         SET_SIZE(_fp_subcc_ccr)
 409 
 410 /*
 411  * Floating Point Exceptions handled according to type:
 412  *      2) unfinished_fpop
 413  *              re-execute the faulty instruction(s) using
 414  *              software emulation (must do every instruction in FQ)
 415  *      3) unimplemented_fpop
 416  *              an unimplemented instruction, if it is legal,
 417  *              will cause emulation of the instruction (and all
 418  *              other instuctions in the FQ)
 419  *      4) sequence_error
 420  *              panic, this should not happen, and if it does it
 421  *              it is the result of a kernel bug
 422  *
 423  * This code assumes the trap preamble has set up the window environment
 424  * for execution of kernel code.
 425  * Note: this code could be changed to be part of the cpu-specific
 426  * (ie, Spitfire-specific) module code before final release.
 427  */
 428 
 429         ENTRY_NP(_fp_exception)
 430         mov     %o7, %l0                ! saved return address
 431         mov     %o0, %l1                ! saved *rp
 432         set     FSR_FTT, %o4            ! put FSR_FTT in %o4
 433         xor     %o4, 0xffffffffffffffff, %o3 ! xor FSR_FTT to get
 434         and     %o1, %o3, %o2           ! an fsr with a zero'd ftt
 435         ldn     [THREAD_REG + T_LWP], %o3 ! get lwp
 436         ldn     [%o3 + LWP_FPU], %l3    ! get lwp_fpu
 437         stx     %o2, [%l3 + FPU_FSR]    ! save floating point status
 438         and     %o1, %o4, %g2           ! get the ftt trap type
 439 #ifdef  DEBUG
 440         brnz,a,pt %g2, fttok
 441           nop
 442         set     .badfpfttmsg, %o0       ! panic message
 443         call    panic                   ! %o1 has the fsr w/ftt value
 444         nop
 445 fttok:
 446 #endif  /* DEBUG */
 447         srl     %g2, FSR_FTT_SHIFT, %o4 ! check ftt
 448         cmp     %o4, FTT_SEQ            ! sanity check for bogus exceptions
 449         !
 450         ! traps are already enabled to allow other
 451         ! interrupts while emulating floating point instructions
 452         !
 453         blt,a,pt %xcc, fpeok
 454         nop
 455         !
 456         ! Sequence error or unknown ftt exception.
 457         !
 458 seq_error:
 459         set     .badfpexcpmsg, %o0      ! panic if bad ftt
 460         call    panic
 461         sra     %o4, 0, %o1             ! mov ftt to o1 for panic message
 462 
 463 fpeok:
 464         call    fp_kstat_update         ! fp_kstat_update(ftt)
 465         mov     %o4, %o0                ! ftt
 466         !
 467         ! Get the floating point instruction, and run the floating
 468         ! point simulator. There is no floating point queue, so we fake one.
 469         !
 470         call    fp_precise              ! fp_precise(&regs)
 471         mov     %l1, %o0                ! saved *rp
 472 
 473 fp_ret:
 474         rd      %fprs, %g1              ! read fprs, save value in %g1
 475         st      %g1, [%l3 + FPU_FPRS]   ! save fprs
 476         jmp     %l0 + 8                 ! jump to saved return address
 477         stx     %fsr, [%l3 + FPU_FSR]   ! save fsr
 478         SET_SIZE(_fp_exception)
 479 
 480 .badfpexcpmsg:
 481         .asciz  "unexpected floating point exception %x"
 482 
 483 #ifdef  DEBUG
 484 .badfpfttmsg:
 485         .asciz  "No floating point ftt, fsr %llx"
 486 #endif  /* DEBUG */
 487 
 488 /*
 489  * Floating Point Exceptions.
 490  * handled according to type:
 491  *      1) IEEE_exception
 492  *              re-execute the faulty instruction(s) using
 493  *              software emulation (must do every instruction in FQ)
 494  *
 495  * This code assumes the trap preamble has set up the window environment
 496  * for execution of kernel code.
 497  */
 498 
 499         ENTRY_NP(_fp_ieee_exception)
 500         mov     %o7, %l0                ! saved return address
 501         mov     %o0, %l1                ! saved *rp
 502         mov     %o1, %l2                ! saved fsr
 503         set     FSR_FTT, %o4            ! put FSR_FTT in %o4
 504         xor     %o4, 0xffffffffffffffff, %o3 ! ! xor FSR_FTT to get
 505         and     %o1, %o3, %o2           ! an fsr with a zero'd ftt
 506         ldn     [THREAD_REG + T_LWP], %o3 ! get lwp
 507         ldn     [%o3 + LWP_FPU], %l3    ! get lwp_fpu
 508         stx     %o2, [%l3 + FPU_FSR]    ! save floating point status
 509         stub    %g0, [%l3 + FPU_QCNT]   ! clear fpu_qcnt
 510         and     %o1, %o4, %g2           ! mask out trap type
 511 #ifdef  DEBUG
 512         brnz,a,pt %g2, fttgd
 513           nop
 514         set     .badfpfttmsg, %o0       ! panic message
 515         call    panic                   ! %o1 has the fsr w/ftt value
 516         nop
 517 fttgd:
 518 #endif  /* DEBUG */
 519         srl     %g2, FSR_FTT_SHIFT, %o4 ! check ftt
 520         cmp     %o4, FTT_SEQ            ! sanity check for bogus exceptions
 521         !
 522         ! traps are already enabled to allow other
 523         ! interrupts while emulating floating point instructions
 524         !
 525         blt,a,pt %xcc, fpegd
 526         nop
 527         !
 528         ! Sequence error or unknown ftt exception.
 529         !
 530 seq_err:
 531         set     .badfpexcpmsg, %o0      ! panic if bad ftt
 532         call    panic
 533         sra     %o4, 0, %o1             ! mov ftt to o1 for panic message
 534 
 535 fpegd:
 536         call    fp_kstat_update         ! fp_kstat_update(ftt)
 537         mov     %o4, %o0                ! ftt
 538         !
 539         ! Call fpu_trap directly, don't bother to run the fp simulator.
 540         ! The *rp is already in %o0. Clear fpu_qcnt.
 541         !
 542         set     (T_FP_EXCEPTION_IEEE), %o2      ! trap type
 543 
 544         set     FSR_CEXC, %o3
 545         and     %l2, %o3, %g2           ! mask out cexc
 546 
 547         andcc   %g2, FSR_CEXC_NX, %g0   ! check for inexact
 548         bnz,a,pt %xcc, fpok
 549         or      %g0, FPE_FLTRES, %o3    ! fp inexact code
 550 
 551         andcc   %g2, FSR_CEXC_DZ, %g0   ! check for divide-by-zero
 552         bnz,a,pt %xcc, fpok
 553         or      %g0, FPE_FLTDIV, %o3    ! fp divide by zero code
 554 
 555         andcc   %g2, FSR_CEXC_UF, %g0   ! check for underflow
 556         bnz,a,pt %xcc, fpok
 557         or      %g0, FPE_FLTUND, %o3    ! fp underflow code
 558 
 559         andcc   %g2, FSR_CEXC_OF, %g0   ! check for overflow
 560         bnz,a,pt %xcc, fpok
 561         or      %g0, FPE_FLTOVF, %o3    ! fp overflow code
 562 
 563         andcc   %g2, FSR_CEXC_NV, %g0   ! check for invalid
 564         bnz,a,pn %xcc, fpok
 565         or      %g0, FPE_FLTINV, %o3    ! fp invalid code
 566 
 567 cexec_err:
 568         set     .badfpcexcmsg, %o0      ! panic message
 569         call    panic                   ! panic if no cexc bit set
 570         mov     %g1, %o1
 571 fpok:
 572         mov     %l1, %o0                ! saved *rp
 573         call    fpu_trap                ! fpu_trap(&regs, addr, type, code)
 574         ldn     [%o0 + PC_OFF], %o1     ! address of trapping instruction
 575 
 576         rd      %fprs, %g1              ! read fprs, save value in %g1
 577         st      %g1, [%l3 + FPU_FPRS]   ! save fprs
 578         jmp     %l0 + 8                 ! jump to saved return address
 579         stx     %fsr, [%l3 + FPU_FSR]   ! save fsr
 580         SET_SIZE(_fp_ieee_exception)
 581 
 582 .badfpcexcmsg:
 583         .asciz  "No floating point exception, fsr %llx"
 584