1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include "assym.h"
  26 
  27 /*
  28  * General assembly language routines.
  29  * It is the intent of this file to contain routines that are
  30  * specific to cpu architecture.
  31  */
  32 
  33 /*
  34  * WARNING: If you add a fast trap handler which can be invoked by a
  35  * non-privileged user, you may have to use the FAST_TRAP_DONE macro
  36  * instead of "done" instruction to return back to the user mode. See
  37  * comments for the "fast_trap_done" entry point for more information.
  38  */
  39 #define FAST_TRAP_DONE  \
  40         ba,a    fast_trap_done
  41 
  42 #include <sys/machclock.h>
  43 #include <sys/clock.h>
  44 
  45 
  46 #include <sys/asm_linkage.h>
  47 #include <sys/privregs.h>
  48 #include <vm/hat_sfmmu.h>
  49 #include <sys/machparam.h>        /* To get SYSBASE and PAGESIZE */
  50 #include <sys/machthread.h>
  51 #include <sys/clock.h>
  52 #include <sys/intreg.h>
  53 #include <sys/psr_compat.h>
  54 #include <sys/isa_defs.h>
  55 #include <sys/dditypes.h>
  56 #include <sys/intr.h>
  57 #include <sys/hypervisor_api.h>
  58 
  59 #include "assym.h"
  60 
  61 #define ICACHE_FLUSHSZ  0x20
  62 
  63         ENTRY_NP(tickcmpr_set)
  64         ! get 64-bit clock_cycles interval
  65         mov     %o0, %o2
  66         mov     8, %o3                  ! A reasonable initial step size
  67 1:
  68         WR_TICKCMPR(%o2,%o4,%o5,__LINE__)       ! Write to TICK_CMPR
  69 
  70         GET_NATIVE_TIME(%o0,%o4,%o5,__LINE__)   ! Read %tick to confirm the
  71                                                 ! value we wrote was in the
  72                                                 ! future.
  73 
  74         cmp     %o2, %o0                ! If the value we wrote was in the
  75         bg,pt   %xcc, 2f                !   future, then blow out of here.
  76           sllx  %o3, 1, %o3             ! If not, then double our step size,
  77         ba,pt   %xcc, 1b                !   and take another lap.
  78           add   %o0, %o3, %o2           !
  79 2:
  80         retl
  81           nop
  82         SET_SIZE(tickcmpr_set)
  83 
  84         ENTRY_NP(tickcmpr_disable)
  85         mov     1, %g1
  86         sllx    %g1, TICKINT_DIS_SHFT, %o0
  87         WR_TICKCMPR(%o0,%o4,%o5,__LINE__)       ! Write to TICK_CMPR
  88         retl
  89           nop
  90         SET_SIZE(tickcmpr_disable)
  91 
  92         .seg    ".text"
  93 tick_write_delta_panic:
  94         .asciz  "tick_write_delta: not supported, delta: 0x%lx"
  95 
  96         ENTRY_NP(tick_write_delta)
  97         sethi   %hi(tick_write_delta_panic), %o1
  98         save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
  99         mov     %i0, %o1
 100         call    panic
 101           or    %i1, %lo(tick_write_delta_panic), %o0
 102         /*NOTREACHED*/
 103         retl
 104           nop
 105 
 106         ENTRY_NP(tickcmpr_disabled)
 107         RD_TICKCMPR(%g1,%o0,%o1,__LINE__)
 108         retl
 109           srlx  %g1, TICKINT_DIS_SHFT, %o0
 110         SET_SIZE(tickcmpr_disabled)
 111 
 112 /*
 113  * Get current tick
 114  */
 115 
 116         ENTRY(gettick)
 117         ALTENTRY(randtick)
 118         GET_NATIVE_TIME(%o0,%o2,%o3,__LINE__)
 119         retl
 120           nop
 121         SET_SIZE(randtick)
 122         SET_SIZE(gettick)
 123 
 124 /*
 125  * Get current tick. For trapstat use only.
 126  */
 127         ENTRY(rdtick)
 128         retl
 129         RD_TICK_PHYSICAL(%o0)
 130         SET_SIZE(rdtick)
 131 
 132 
 133 /*
 134  * Return the counter portion of the tick register.
 135  */
 136 
 137         ENTRY_NP(gettick_counter)
 138         RD_TICK(%o0,%o1,%o2,__LINE__)
 139         retl
 140         nop
 141         SET_SIZE(gettick_counter)
 142 
 143         ENTRY_NP(gettick_npt)
 144         RD_TICK_PHYSICAL(%o0)
 145         retl
 146         srlx    %o0, 63, %o0
 147         SET_SIZE(gettick_npt)
 148 
 149         ENTRY_NP(getstick_npt)
 150         RD_STICK_PHYSICAL(%o0)
 151         retl
 152         srlx    %o0, 63, %o0
 153         SET_SIZE(getstick_npt)
 154 
 155 /*
 156  * Provide a C callable interface to the trap that reads the hi-res timer.
 157  * Returns 64-bit nanosecond timestamp in %o0 and %o1.
 158  */
 159 
 160         ENTRY_NP(gethrtime)
 161         GET_HRTIME(%g1,%o0,%o1,%o2,%o3,%o4,%o5,%g2,__LINE__)
 162                                                         ! %g1 = hrtime
 163         retl
 164           mov   %g1, %o0
 165         SET_SIZE(gethrtime)
 166 
 167         ENTRY_NP(gethrtime_unscaled)
 168         GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)   ! %g1 = native time
 169         retl
 170           mov   %g1, %o0
 171         SET_SIZE(gethrtime_unscaled)
 172 
 173         ENTRY_NP(gethrtime_waitfree)
 174         ALTENTRY(dtrace_gethrtime)
 175         GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)   ! %g1 = native time
 176         NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
 177         retl
 178           mov   %g1, %o0
 179         SET_SIZE(dtrace_gethrtime)
 180         SET_SIZE(gethrtime_waitfree)
 181 
 182         ENTRY(gethrtime_max)
 183         NATIVE_TIME_MAX(%g1)
 184         NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
 185 
 186         ! hrtime_t's are signed, max hrtime_t must be positive
 187         mov     -1, %o2
 188         brlz,a  %g1, 1f
 189           srlx  %o2, 1, %g1
 190 1:
 191         retl
 192           mov   %g1, %o0
 193         SET_SIZE(gethrtime_max)
 194 
 195         ENTRY(scalehrtime)
 196         ldx     [%o0], %o1
 197         NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
 198         retl
 199           stx   %o1, [%o0]
 200         SET_SIZE(scalehrtime)
 201 
 202 /*
 203  * Fast trap to return a timestamp, uses trap window, leaves traps
 204  * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
 205  *
 206  * This is the handler for the ST_GETHRTIME trap.
 207  */
 208 
 209         ENTRY_NP(get_timestamp)
 210         GET_HRTIME(%g1,%g2,%g3,%g4,%g5,%o0,%o1,%o2,__LINE__)
 211         ! %g1 = hrtime
 212         srlx    %g1, 32, %o0                            ! %o0 = hi32(%g1)
 213         srl     %g1, 0, %o1                             ! %o1 = lo32(%g1)
 214         FAST_TRAP_DONE
 215         SET_SIZE(get_timestamp)
 216 
 217 /*
 218  * Macro to convert GET_HRESTIME() bits into a timestamp.
 219  *
 220  * We use two separate macros so that the platform-dependent GET_HRESTIME()
 221  * can be as small as possible; CONV_HRESTIME() implements the generic part.
 222  */
 223 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
 224         brz,pt  adj, 3f;                /* no adjustments, it's easy */ \
 225         add     hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \
 226         brlz,pn adj, 2f;                /* if hrestime_adj negative */  \
 227           srlx  nslt, ADJ_SHIFT, nslt;  /* delay: nslt >>= 4 */           \
 228         subcc   adj, nslt, %g0;         /* hrestime_adj - nslt/16 */    \
 229         movg    %xcc, nslt, adj;        /* adj by min(adj, nslt/16) */  \
 230         ba      3f;                     /* go convert to sec/nsec */    \
 231           add   hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
 232 2:      addcc   adj, nslt, %g0;         /* hrestime_adj + nslt/16 */    \
 233         bge,a,pt %xcc, 3f;              /* is adj less negative? */     \
 234           add   hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \
 235         sub     hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
 236 3:      cmp     hrestnsec, nano;        /* more than a billion? */      \
 237         bl,pt   %xcc, 4f;               /* if not, we're done */        \
 238           nop;                          /* delay: do nothing :( */      \
 239         add     hrestsec, 1, hrestsec;  /* hrest.tv_sec++; */           \
 240         sub     hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */     \
 241         ba,a    3b;                     /* check >= billion again */ \
 242 4:
 243 
 244         ENTRY_NP(gethrestime)
 245         GET_HRESTIME(%o1,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
 246         CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
 247         stn     %o1, [%o0]
 248         retl
 249           stn   %o2, [%o0 + CLONGSIZE]
 250         SET_SIZE(gethrestime)
 251 
 252 /*
 253  * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
 254  * seconds.
 255  */
 256         ENTRY_NP(gethrestime_sec)
 257         GET_HRESTIME(%o0,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
 258         CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
 259         retl                                    ! %o0 current hrestime seconds
 260           nop
 261         SET_SIZE(gethrestime_sec)
 262 
 263 /*
 264  * Returns the hrestime on the last tick.  This is simpler than gethrestime()
 265  * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
 266  * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
 267  * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
 268  * rely on load dependencies to effect the membar #LoadLoad, instead declaring
 269  * it explicitly.)
 270  */
 271         ENTRY_NP(gethrestime_lasttick)
 272         sethi   %hi(hres_lock), %o1
 273 0:
 274         lduw    [%o1 + %lo(hres_lock)], %o2     ! Load lock value
 275         membar  #LoadLoad                       ! Load of lock must complete
 276         andn    %o2, 1, %o2                     ! Mask off lowest bit   
 277         ldn     [%o1 + %lo(hrestime)], %g1      ! Seconds.
 278         add     %o1, %lo(hrestime), %o4
 279         ldn     [%o4 + CLONGSIZE], %g2          ! Nanoseconds.
 280         membar  #LoadLoad                       ! All loads must complete
 281         lduw    [%o1 + %lo(hres_lock)], %o3     ! Reload lock value
 282         cmp     %o3, %o2                        ! If lock is locked or has
 283         bne     0b                              !   changed, retry.
 284           stn   %g1, [%o0]                      ! Delay: store seconds
 285         retl
 286           stn   %g2, [%o0 + CLONGSIZE]          ! Delay: store nanoseconds
 287         SET_SIZE(gethrestime_lasttick)
 288 
 289 /*
 290  * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
 291  *
 292  * This is the handler for the ST_GETHRESTIME trap.
 293  */
 294 
 295         ENTRY_NP(get_hrestime)
 296         GET_HRESTIME(%o0,%o1,%g1,%g2,%g3,%g4,%g5,%o2,%o3,__LINE__)
 297         CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
 298         FAST_TRAP_DONE
 299         SET_SIZE(get_hrestime)
 300 
 301 /*
 302  * Fast trap to return lwp virtual time, uses trap window, leaves traps
 303  * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
 304  * of nanoseconds consumed.
 305  *
 306  * This is the handler for the ST_GETHRVTIME trap.
 307  *
 308  * Register usage:
 309  *      %o0, %o1 = return lwp virtual time
 310  *      %o2 = CPU/thread
 311  *      %o3 = lwp
 312  *      %g1 = scratch
 313  *      %g5 = scratch
 314  */
 315         ENTRY_NP(get_virtime)
 316         GET_NATIVE_TIME(%g5,%g1,%g2,__LINE__)   ! %g5 = native time in ticks
 317         CPU_ADDR(%g2, %g3)                      ! CPU struct ptr to %g2
 318         ldn     [%g2 + CPU_THREAD], %g2         ! thread pointer to %g2
 319         ldn     [%g2 + T_LWP], %g3              ! lwp pointer to %g3
 320 
 321         /*
 322          * Subtract start time of current microstate from time
 323          * of day to get increment for lwp virtual time.
 324          */
 325         ldx     [%g3 + LWP_STATE_START], %g1    ! ms_state_start
 326         sub     %g5, %g1, %g5
 327 
 328         /*
 329          * Add current value of ms_acct[LMS_USER]
 330          */
 331         ldx     [%g3 + LWP_ACCT_USER], %g1      ! ms_acct[LMS_USER]
 332         add     %g5, %g1, %g5
 333         NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 
 334         
 335         srl     %g5, 0, %o1                     ! %o1 = lo32(%g5)
 336         srlx    %g5, 32, %o0                    ! %o0 = hi32(%g5)
 337 
 338         FAST_TRAP_DONE
 339         SET_SIZE(get_virtime)
 340 
 341 
 342 
 343         .seg    ".text"
 344 hrtime_base_panic:
 345         .asciz  "hrtime_base stepping back"
 346 
 347 
 348         ENTRY_NP(hres_tick)
 349         save    %sp, -SA(MINFRAME), %sp ! get a new window
 350 
 351         sethi   %hi(hrestime), %l4
 352         ldstub  [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5  ! try locking
 353 7:      tst     %l5
 354         bz,pt   %xcc, 8f                        ! if we got it, drive on
 355           ld    [%l4 + %lo(nsec_scale)], %l5    ! delay: %l5 = scaling factor
 356         ldub    [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 357 9:      tst     %l5
 358         bz,a,pn %xcc, 7b
 359           ldstub        [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 360         ba,pt   %xcc, 9b
 361           ldub  [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 362 8:
 363         membar  #StoreLoad|#StoreStore
 364 
 365         !
 366         ! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
 367         !
 368         ldx     [%l4 + %lo(hrtime_base)], %g1   ! load current hrtime_base
 369         GET_NATIVE_TIME(%l0,%l3,%l6,__LINE__)   ! current native time
 370         stx     %l0, [%l4 + %lo(hres_last_tick)]! prev = current
 371         ! convert native time to nsecs
 372         NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
 373 
 374         sub     %l0, %g1, %i1                   ! get accurate nsec delta
 375 
 376         ldx     [%l4 + %lo(hrtime_base)], %l1   
 377         cmp     %l1, %l0
 378         bg,pn   %xcc, 9f
 379           nop
 380 
 381         stx     %l0, [%l4 + %lo(hrtime_base)]   ! update hrtime_base
 382 
 383         !
 384         ! apply adjustment, if any
 385         !
 386         ldx     [%l4 + %lo(hrestime_adj)], %l0  ! %l0 = hrestime_adj
 387         brz     %l0, 2f
 388                                                 ! hrestime_adj == 0 ?
 389                                                 ! yes, skip adjustments
 390           clr   %l5                             ! delay: set adj to zero
 391         tst     %l0                             ! is hrestime_adj >= 0 ?
 392         bge,pt  %xcc, 1f                        ! yes, go handle positive case
 393           srl   %i1, ADJ_SHIFT, %l5             ! delay: %l5 = adj
 394 
 395         addcc   %l0, %l5, %g0                   ! hrestime_adj < -adj ?
 396         bl,pt   %xcc, 2f                        ! yes, use current adj
 397           neg   %l5                             ! delay: %l5 = -adj
 398         ba,pt   %xcc, 2f
 399           mov   %l0, %l5                        ! no, so set adj = hrestime_adj
 400 1:
 401         subcc   %l0, %l5, %g0                   ! hrestime_adj < adj ?
 402         bl,a,pt %xcc, 2f                        ! yes, set adj = hrestime_adj
 403           mov   %l0, %l5                        ! delay: adj = hrestime_adj
 404 2:
 405         ldx     [%l4 + %lo(timedelta)], %l0     ! %l0 = timedelta
 406         sub     %l0, %l5, %l0                   ! timedelta -= adj
 407 
 408         stx     %l0, [%l4 + %lo(timedelta)]     ! store new timedelta
 409         stx     %l0, [%l4 + %lo(hrestime_adj)]  ! hrestime_adj = timedelta
 410 
 411         or      %l4, %lo(hrestime), %l2
 412         ldn     [%l2], %i2                      ! %i2:%i3 = hrestime sec:nsec
 413         ldn     [%l2 + CLONGSIZE], %i3
 414         add     %i3, %l5, %i3                   ! hrestime.nsec += adj
 415         add     %i3, %i1, %i3                   ! hrestime.nsec += nslt
 416 
 417         set     NANOSEC, %l5                    ! %l5 = NANOSEC
 418         cmp     %i3, %l5
 419         bl,pt   %xcc, 5f                        ! if hrestime.tv_nsec < NANOSEC
 420           sethi %hi(one_sec), %i1               ! delay
 421         add     %i2, 0x1, %i2                   ! hrestime.tv_sec++
 422         sub     %i3, %l5, %i3                   ! hrestime.tv_nsec - NANOSEC
 423         mov     0x1, %l5
 424         st      %l5, [%i1 + %lo(one_sec)]
 425 5:
 426         stn     %i2, [%l2]
 427         stn     %i3, [%l2 + CLONGSIZE]          ! store the new hrestime
 428 
 429         membar  #StoreStore
 430 
 431         ld      [%l4 + %lo(hres_lock)], %i1
 432         inc     %i1                             ! release lock
 433         st      %i1, [%l4 + %lo(hres_lock)]     ! clear hres_lock
 434 
 435         ret
 436         restore
 437 
 438 9:
 439         !
 440         ! release hres_lock
 441         !
 442         ld      [%l4 + %lo(hres_lock)], %i1
 443         inc     %i1
 444         st      %i1, [%l4 + %lo(hres_lock)]
 445 
 446         sethi   %hi(hrtime_base_panic), %o0
 447         call    panic
 448           or    %o0, %lo(hrtime_base_panic), %o0
 449 
 450         SET_SIZE(hres_tick)
 451 
 452         .seg    ".text"
 453 kstat_q_panic_msg:
 454         .asciz  "kstat_q_exit: qlen == 0"
 455 
 456         ENTRY(kstat_q_panic)
 457         save    %sp, -SA(MINFRAME), %sp
 458         sethi   %hi(kstat_q_panic_msg), %o0
 459         call    panic
 460           or    %o0, %lo(kstat_q_panic_msg), %o0
 461         /*NOTREACHED*/
 462         SET_SIZE(kstat_q_panic)
 463 
 464 #define BRZPN   brz,pn
 465 #define BRZPT   brz,pt
 466 
 467 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
 468         ld      [%o0 + QTYPE/**/CNT], %o1;      /* %o1 = old qlen */    \
 469         QOP     %o1, 1, %o2;                    /* %o2 = new qlen */    \
 470         QBR     %o1, QZERO;                     /* done if qlen == 0 */ \
 471         st      %o2, [%o0 + QTYPE/**/CNT];      /* delay: save qlen */  \
 472         ldx     [%o0 + QTYPE/**/LASTUPDATE], %o3;                       \
 473         ldx     [%o0 + QTYPE/**/TIME], %o4;     /* %o4 = old time */    \
 474         ldx     [%o0 + QTYPE/**/LENTIME], %o5;  /* %o5 = old lentime */ \
 475         sub     %g1, %o3, %o2;                  /* %o2 = time delta */  \
 476         mulx    %o1, %o2, %o3;                  /* %o3 = cur lentime */ \
 477         add     %o4, %o2, %o4;                  /* %o4 = new time */    \
 478         add     %o5, %o3, %o5;                  /* %o5 = new lentime */ \
 479         stx     %o4, [%o0 + QTYPE/**/TIME];     /* save time */         \
 480         stx     %o5, [%o0 + QTYPE/**/LENTIME];  /* save lentime */      \
 481 QRETURN;                                                                \
 482         stx     %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
 483 
 484 #if !defined(DEBUG)
 485 /*
 486  * same as KSTAT_Q_UPDATE but without:
 487  * QBR     %o1, QZERO;
 488  * to be used only with non-debug build. mimics ASSERT() behaviour.
 489  */
 490 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
 491         ld      [%o0 + QTYPE/**/CNT], %o1;      /* %o1 = old qlen */    \
 492         QOP     %o1, 1, %o2;                    /* %o2 = new qlen */    \
 493         st      %o2, [%o0 + QTYPE/**/CNT];      /* delay: save qlen */  \
 494         ldx     [%o0 + QTYPE/**/LASTUPDATE], %o3;                       \
 495         ldx     [%o0 + QTYPE/**/TIME], %o4;     /* %o4 = old time */    \
 496         ldx     [%o0 + QTYPE/**/LENTIME], %o5;  /* %o5 = old lentime */ \
 497         sub     %g1, %o3, %o2;                  /* %o2 = time delta */  \
 498         mulx    %o1, %o2, %o3;                  /* %o3 = cur lentime */ \
 499         add     %o4, %o2, %o4;                  /* %o4 = new time */    \
 500         add     %o5, %o3, %o5;                  /* %o5 = new lentime */ \
 501         stx     %o4, [%o0 + QTYPE/**/TIME];     /* save time */         \
 502         stx     %o5, [%o0 + QTYPE/**/LENTIME];  /* save lentime */      \
 503 QRETURN;                                                                \
 504         stx     %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
 505 #endif
 506 
 507         .align 16
 508         ENTRY(kstat_waitq_enter)
 509         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 510         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
 511         SET_SIZE(kstat_waitq_enter)
 512 
 513         .align 16
 514         ENTRY(kstat_waitq_exit)
 515         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 516 #if defined(DEBUG)
 517         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
 518 #else
 519         KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W)
 520 #endif
 521         SET_SIZE(kstat_waitq_exit)
 522 
 523         .align 16
 524         ENTRY(kstat_runq_enter)
 525         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 526         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
 527         SET_SIZE(kstat_runq_enter)
 528 
 529         .align 16
 530         ENTRY(kstat_runq_exit)
 531         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 532 #if defined(DEBUG)
 533         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
 534 #else
 535         KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R)
 536 #endif
 537         SET_SIZE(kstat_runq_exit)
 538 
 539         .align 16
 540         ENTRY(kstat_waitq_to_runq)
 541         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 542 #if defined(DEBUG)
 543         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
 544 #else
 545         KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
 546 #endif
 547         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
 548         SET_SIZE(kstat_waitq_to_runq)
 549 
 550         .align 16
 551         ENTRY(kstat_runq_back_to_waitq)
 552         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 553 #if defined(DEBUG)
 554         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
 555 #else
 556         KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
 557 #endif
 558         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
 559         SET_SIZE(kstat_runq_back_to_waitq)
 560 
 561         /*
 562          *  -- WARNING --
 563          *
 564          * The following variables MUST be together on a 128-byte boundary.
 565          * In addition to the primary performance motivation (having them all
 566          * on the same cache line(s)), code here and in the GET*TIME() macros
 567          * assumes that they all have the same high 22 address bits (so
 568          * there's only one sethi).
 569          */
 570         .seg    ".data"
 571         .global timedelta, hres_last_tick, hrestime, hrestime_adj
 572         .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
 573         .global nsec_shift, adj_shift, native_tick_offset, native_stick_offset
 574 
 575         /* XXX - above comment claims 128-bytes is necessary */
 576         .align  64
 577 timedelta:
 578         .word   0, 0            /* int64_t */
 579 hres_last_tick:
 580         .word   0, 0            /* hrtime_t */
 581 hrestime:
 582         .nword  0, 0            /* 2 longs */
 583 hrestime_adj:
 584         .word   0, 0            /* int64_t */
 585 hres_lock:
 586         .word   0
 587 nsec_scale:
 588         .word   0
 589 hrtime_base:
 590         .word   0, 0
 591 traptrace_use_stick:
 592         .word   0
 593 nsec_shift:
 594         .word   NSEC_SHIFT
 595 adj_shift:
 596         .word   ADJ_SHIFT
 597         .align  8
 598 native_tick_offset:
 599         .word   0, 0
 600         .align  8
 601 native_stick_offset:
 602         .word   0, 0
 603 
 604 
 605 /*
 606  * drv_usecwait(clock_t n)      [DDI/DKI - section 9F]
 607  * usec_delay(int n)            [compatibility - should go one day]
 608  * Delay by spinning.
 609  *
 610  * delay for n microseconds.  numbers <= 0 delay 1 usec
 611  *
 612  * With UltraSPARC-III the combination of supporting mixed-speed CPUs
 613  * and variable clock rate for power management requires that we
 614  * use %stick to implement this routine.
 615  */
 616 
 617         ENTRY(drv_usecwait)
 618         ALTENTRY(usec_delay)
 619         brlez,a,pn %o0, 0f
 620           mov   1, %o0
 621 0:
 622         sethi   %hi(sticks_per_usec), %o1
 623         lduw    [%o1 + %lo(sticks_per_usec)], %o1
 624         mulx    %o1, %o0, %o1           ! Scale usec to ticks
 625         inc     %o1                     ! We don't start on a tick edge
 626         GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
 627         add     %o1, %o2, %o1
 628 
 629 1:      cmp     %o1, %o2
 630         GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
 631         bgeu,pt %xcc, 1b
 632           nop
 633         retl
 634           nop
 635         SET_SIZE(usec_delay)
 636         SET_SIZE(drv_usecwait)
 637 
 638 /*
 639  * Level-14 interrupt prologue.
 640  */
 641         ENTRY_NP(pil14_interrupt)
 642         CPU_ADDR(%g1, %g2)
 643         rdpr    %pil, %g6                       ! %g6 = interrupted PIL
 644         stn     %g6, [%g1 + CPU_PROFILE_PIL]    ! record interrupted PIL
 645         rdpr    %tstate, %g6
 646         rdpr    %tpc, %g5
 647         btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
 648         bnz,a,pt %xcc, 1f
 649           stn   %g5, [%g1 + CPU_PROFILE_PC]     ! if so, record kernel PC
 650         stn     %g5, [%g1 + CPU_PROFILE_UPC]    ! if not, record user PC
 651         ba      pil_interrupt_common            ! must be large-disp branch
 652           stn   %g0, [%g1 + CPU_PROFILE_PC]     ! zero kernel PC
 653 1:      ba      pil_interrupt_common            ! must be large-disp branch
 654           stn   %g0, [%g1 + CPU_PROFILE_UPC]    ! zero user PC
 655         SET_SIZE(pil14_interrupt)
 656 
 657         ENTRY_NP(tick_rtt)
 658         !
 659         ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
 660         ! disabled.  If TICK_COMPARE is enabled, we know that we need to
 661         ! reenqueue the interrupt request structure.  We'll then check TICKINT
 662         ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
 663         ! interrupt.  In this case, TICK_COMPARE may have been rewritten
 664         ! recently; we'll compare %o5 to the current time to verify that it's
 665         ! in the future.  
 666         !
 667         ! Note that %o5 is live until after 1f.
 668         ! XXX - there is a subroutine call while %o5 is live!
 669         !
 670         RD_TICKCMPR(%o5,%g1,%g2,__LINE__)
 671         srlx    %o5, TICKINT_DIS_SHFT, %g1
 672         brnz,pt %g1, 2f
 673           nop
 674 
 675         rdpr    %pstate, %g5
 676         andn    %g5, PSTATE_IE, %g1
 677         wrpr    %g0, %g1, %pstate               ! Disable vec interrupts
 678 
 679         sethi   %hi(cbe_level14_inum), %o1
 680         ldx     [%o1 + %lo(cbe_level14_inum)], %o1
 681         call    intr_enqueue_req ! preserves %o5 and %g5
 682           mov   PIL_14, %o0
 683 
 684         ! Check SOFTINT for TICKINT/STICKINT
 685         rd      SOFTINT, %o4
 686         set     (TICK_INT_MASK | STICK_INT_MASK), %o0
 687         andcc   %o4, %o0, %g0
 688         bz,a,pn %icc, 2f
 689           wrpr  %g0, %g5, %pstate               ! Enable vec interrupts
 690 
 691         ! clear TICKINT/STICKINT
 692         wr      %o0, CLEAR_SOFTINT
 693 
 694         !
 695         ! Now that we've cleared TICKINT, we can reread %tick and confirm
 696         ! that the value we programmed is still in the future.  If it isn't,
 697         ! we need to reprogram TICK_COMPARE to fire as soon as possible.
 698         !
 699         GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)   ! %o0 = tick
 700         cmp     %o5, %o0                        ! In the future?
 701         bg,a,pt %xcc, 2f                        ! Yes, drive on.
 702           wrpr  %g0, %g5, %pstate               !   delay: enable vec intr
 703 
 704         !
 705         ! If we're here, then we have programmed TICK_COMPARE with a %tick
 706         ! which is in the past; we'll now load an initial step size, and loop
 707         ! until we've managed to program TICK_COMPARE to fire in the future.
 708         !
 709         mov     8, %o4                          ! 8 = arbitrary inital step
 710 1:      add     %o0, %o4, %o5                   ! Add the step
 711         WR_TICKCMPR(%o5,%g1,%g2,__LINE__)       ! Write to TICK_CMPR
 712         GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)   ! %o0 = tick
 713         cmp     %o5, %o0                        ! In the future?
 714         bg,a,pt %xcc, 2f                        ! Yes, drive on.
 715           wrpr  %g0, %g5, %pstate               !    delay: enable vec intr
 716         ba      1b                              ! No, try again.
 717           sllx  %o4, 1, %o4                     !    delay: double step size
 718 
 719 2:      ba      current_thread_complete
 720           nop
 721         SET_SIZE(tick_rtt)
 722 
 723 /*
 724  * Level-15 interrupt prologue.
 725  */
 726        ENTRY_NP(pil15_interrupt)
 727        CPU_ADDR(%g1, %g2)
 728        rdpr    %tstate, %g6
 729        rdpr    %tpc, %g5
 730        btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
 731        bnz,a,pt %xcc, 1f
 732        stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
 733        stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
 734        ba      pil15_epilogue                  ! must be large-disp branch
 735        stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
 736 1:     ba      pil15_epilogue                  ! must be large-disp branch
 737        stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
 738        SET_SIZE(pil15_interrupt)
 739 
 740 /* XXXQ These should be inline templates, not functions */
 741         ENTRY(prefetch_page_w)
 742         retl
 743           nop
 744         SET_SIZE(prefetch_page_w)
 745 
 746         ENTRY(prefetch_page_r)
 747         retl
 748           nop
 749         SET_SIZE(prefetch_page_r)
 750 
 751 /* XXXQ These should be inline templates, not functions */
 752         ENTRY(prefetch_smap_w)
 753         retl
 754           nop
 755         SET_SIZE(prefetch_smap_w)
 756 
 757 /*
 758  * Generic sun4v MMU and Cache operations.
 759  */
 760 
 761         ENTRY_NP(vtag_flushpage)
 762         /*
 763          * flush page from the tlb
 764          *
 765          * %o0 = vaddr
 766          * %o1 = sfmmup
 767          */
 768         SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
 769 
 770         mov     %g1, %o1 
 771         mov     MAP_ITLB | MAP_DTLB, %o2
 772         ta      MMU_UNMAP_ADDR
 773         brz,pt  %o0, 1f
 774           nop
 775         ba      panic_bad_hcall
 776           mov   MMU_UNMAP_ADDR, %o1
 777 1:
 778         retl
 779           nop
 780         SET_SIZE(vtag_flushpage)
 781 
 782         ENTRY_NP(vtag_flushall)
 783         mov     %g0, %o0        ! XXX no cpu list yet
 784         mov     %g0, %o1        ! XXX no cpu list yet
 785         mov     MAP_ITLB | MAP_DTLB, %o2
 786         mov     MMU_DEMAP_ALL, %o5
 787         ta      FAST_TRAP
 788         brz,pt  %o0, 1f
 789           nop
 790         ba      panic_bad_hcall
 791           mov   MMU_DEMAP_ALL, %o1
 792 1:
 793         retl
 794           nop
 795         SET_SIZE(vtag_flushall)
 796 
 797         ENTRY_NP(vtag_unmap_perm_tl1)
 798         /*
 799          * x-trap to unmap perm map entry
 800          * %g1 = vaddr
 801          * %g2 = ctxnum (KCONTEXT only)
 802          */
 803         mov     %o0, %g3
 804         mov     %o1, %g4
 805         mov     %o2, %g5
 806         mov     %o5, %g6
 807         mov     %g1, %o0
 808         mov     %g2, %o1
 809         mov     MAP_ITLB | MAP_DTLB, %o2
 810         mov     UNMAP_PERM_ADDR, %o5
 811         ta      FAST_TRAP
 812         brz,pt  %o0, 1f
 813         nop
 814 
 815         mov     PTL1_BAD_HCALL, %g1
 816 
 817         cmp     %o0, H_ENOMAP
 818         move    %xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
 819         
 820         cmp     %o0, H_EINVAL 
 821         move    %xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
 822 
 823         ba,a    ptl1_panic
 824 1:
 825         mov     %g6, %o5
 826         mov     %g5, %o2
 827         mov     %g4, %o1
 828         mov     %g3, %o0
 829         retry
 830         SET_SIZE(vtag_unmap_perm_tl1)
 831 
 832         ENTRY_NP(vtag_flushpage_tl1)
 833         /*
 834          * x-trap to flush page from tlb and tsb
 835          *
 836          * %g1 = vaddr, zero-extended on 32-bit kernel
 837          * %g2 = sfmmup
 838          *
 839          * assumes TSBE_TAG = 0
 840          */
 841         srln    %g1, MMU_PAGESHIFT, %g1
 842         slln    %g1, MMU_PAGESHIFT, %g1                 /* g1 = vaddr */
 843         mov     %o0, %g3
 844         mov     %o1, %g4
 845         mov     %o2, %g5
 846         mov     %g1, %o0                        /* vaddr */
 847 
 848         SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
 849 
 850         mov     MAP_ITLB | MAP_DTLB, %o2
 851         ta      MMU_UNMAP_ADDR
 852         brz,pt  %o0, 1f
 853         nop
 854           ba    ptl1_panic
 855         mov     PTL1_BAD_HCALL, %g1
 856 1:
 857         mov     %g5, %o2
 858         mov     %g4, %o1
 859         mov     %g3, %o0
 860         membar #Sync
 861         retry
 862         SET_SIZE(vtag_flushpage_tl1)
 863 
 864         ENTRY_NP(vtag_flush_pgcnt_tl1)
 865         /*
 866          * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
 867          *
 868          * %g1 = vaddr, zero-extended on 32-bit kernel
 869          * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
 870          *
 871          * NOTE: this handler relies on the fact that no
 872          *      interrupts or traps can occur during the loop
 873          *      issuing the TLB_DEMAP operations. It is assumed
 874          *      that interrupts are disabled and this code is
 875          *      fetching from the kernel locked text address.
 876          *
 877          * assumes TSBE_TAG = 0
 878          */
 879         srln    %g1, MMU_PAGESHIFT, %g1
 880         slln    %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 881         mov     %o0, %g3
 882         mov     %o1, %g4
 883         mov     %o2, %g5
 884 
 885         and     %g2, SFMMU_PGCNT_MASK, %g7      /* g7 = pgcnt - 1 */
 886         add     %g7, 1, %g7                     /* g7 = pgcnt */
 887 
 888         andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
 889 
 890         SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
 891 
 892         set     MMU_PAGESIZE, %g6               /* g6 = pgsize */
 893 
 894 1:      
 895         mov     %g1, %o0                        /* vaddr */
 896         mov     %g2, %o1                        /* cnum */
 897         mov     MAP_ITLB | MAP_DTLB, %o2
 898         ta      MMU_UNMAP_ADDR
 899         brz,pt  %o0, 2f
 900           nop
 901         ba      ptl1_panic
 902           mov   PTL1_BAD_HCALL, %g1
 903 2:
 904         deccc   %g7                             /* decr pgcnt */
 905         bnz,pt  %icc,1b
 906           add   %g1, %g6, %g1                   /* go to nextpage */
 907 
 908         mov     %g5, %o2
 909         mov     %g4, %o1
 910         mov     %g3, %o0
 911         membar #Sync
 912         retry
 913         SET_SIZE(vtag_flush_pgcnt_tl1)
 914 
 915         ! Not implemented on US1/US2
 916         ENTRY_NP(vtag_flushall_tl1)
 917         mov     %o0, %g3
 918         mov     %o1, %g4
 919         mov     %o2, %g5
 920         mov     %o3, %g6        ! XXXQ not used?
 921         mov     %o5, %g7
 922         mov     %g0, %o0        ! XXX no cpu list yet
 923         mov     %g0, %o1        ! XXX no cpu list yet
 924         mov     MAP_ITLB | MAP_DTLB, %o2
 925         mov     MMU_DEMAP_ALL, %o5
 926         ta      FAST_TRAP
 927         brz,pt  %o0, 1f
 928           nop
 929         ba      ptl1_panic
 930           mov   PTL1_BAD_HCALL, %g1
 931 1:
 932         mov     %g7, %o5
 933         mov     %g6, %o3        ! XXXQ not used?
 934         mov     %g5, %o2
 935         mov     %g4, %o1
 936         mov     %g3, %o0
 937         retry
 938         SET_SIZE(vtag_flushall_tl1)
 939 
 940 /*
 941  * flush_instr_mem:
 942  *      Flush a portion of the I-$ starting at vaddr
 943  *      %o0 vaddr
 944  *      %o1 bytes to be flushed
 945  */
 946 
 947         ENTRY(flush_instr_mem)
 948         membar  #StoreStore                             ! Ensure the stores
 949                                                         ! are globally visible
 950 1:
 951         flush   %o0
 952         subcc   %o1, ICACHE_FLUSHSZ, %o1                ! bytes = bytes-0x20
 953         bgu,pt  %ncc, 1b
 954           add   %o0, ICACHE_FLUSHSZ, %o0                ! vaddr = vaddr+0x20
 955 
 956         retl
 957           nop
 958         SET_SIZE(flush_instr_mem)
 959 
 960 #if !defined(CUSTOM_FPZERO)
 961 
 962 /*
 963  * fp_zero() - clear all fp data registers and the fsr
 964  */
 965 
 966 .global fp_zero_zero
 967 .align 8
 968 fp_zero_zero:
 969         .xword  0
 970 
 971         ENTRY_NP(fp_zero)
 972         sethi   %hi(fp_zero_zero), %o0
 973         ldx     [%o0 + %lo(fp_zero_zero)], %fsr
 974         ldd     [%o0 + %lo(fp_zero_zero)], %f0
 975         fmovd   %f0, %f2
 976         fmovd   %f0, %f4
 977         fmovd   %f0, %f6
 978         fmovd   %f0, %f8
 979         fmovd   %f0, %f10
 980         fmovd   %f0, %f12
 981         fmovd   %f0, %f14
 982         fmovd   %f0, %f16
 983         fmovd   %f0, %f18
 984         fmovd   %f0, %f20
 985         fmovd   %f0, %f22
 986         fmovd   %f0, %f24
 987         fmovd   %f0, %f26
 988         fmovd   %f0, %f28
 989         fmovd   %f0, %f30
 990         fmovd   %f0, %f32
 991         fmovd   %f0, %f34
 992         fmovd   %f0, %f36
 993         fmovd   %f0, %f38
 994         fmovd   %f0, %f40
 995         fmovd   %f0, %f42
 996         fmovd   %f0, %f44
 997         fmovd   %f0, %f46
 998         fmovd   %f0, %f48
 999         fmovd   %f0, %f50
1000         fmovd   %f0, %f52
1001         fmovd   %f0, %f54
1002         fmovd   %f0, %f56
1003         fmovd   %f0, %f58
1004         fmovd   %f0, %f60
1005         retl
1006         fmovd   %f0, %f62
1007         SET_SIZE(fp_zero)
1008 
1009 #endif  /* CUSTOM_FPZERO */