1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include "assym.h"
  26 
  27 /*
  28  * General assembly language routines.
  29  * It is the intent of this file to contain routines that are
  30  * specific to cpu architecture.
  31  */
  32 
  33 /*
  34  * WARNING: If you add a fast trap handler which can be invoked by a
  35  * non-privileged user, you may have to use the FAST_TRAP_DONE macro
  36  * instead of "done" instruction to return back to the user mode. See
  37  * comments for the "fast_trap_done" entry point for more information.
  38  */
  39 #define FAST_TRAP_DONE  \
  40         ba,a    fast_trap_done
  41 
  42 #include <sys/machclock.h>
  43 #include <sys/clock.h>
  44 
  45 
  46 #include <sys/asm_linkage.h>
  47 #include <sys/privregs.h>
  48 #include <vm/hat_sfmmu.h>
  49 #include <sys/machparam.h>        /* To get SYSBASE and PAGESIZE */
  50 #include <sys/machthread.h>
  51 #include <sys/clock.h>
  52 #include <sys/intreg.h>
  53 #include <sys/psr_compat.h>
  54 #include <sys/isa_defs.h>
  55 #include <sys/dditypes.h>
  56 #include <sys/intr.h>
  57 #include <sys/hypervisor_api.h>
  58 
  59 #include "assym.h"
  60 
  61 #define ICACHE_FLUSHSZ  0x20
  62 
  63 /*
  64  * Softint generated when counter field of tick reg matches value field
  65  * of tick_cmpr reg
  66  */
  67         ENTRY_NP(tickcmpr_set)
  68         ! get 64-bit clock_cycles interval
  69         mov     %o0, %o2
  70         mov     8, %o3                  ! A reasonable initial step size
  71 1:
  72         WR_TICKCMPR(%o2,%o4,%o5,__LINE__)       ! Write to TICK_CMPR
  73 
  74         GET_NATIVE_TIME(%o0,%o4,%o5,__LINE__)   ! Read %tick to confirm the
  75                                                 ! value we wrote was in the
  76                                                 ! future.
  77 
  78         cmp     %o2, %o0                ! If the value we wrote was in the
  79         bg,pt   %xcc, 2f                !   future, then blow out of here.
  80           sllx  %o3, 1, %o3             ! If not, then double our step size,
  81         ba,pt   %xcc, 1b                !   and take another lap.
  82           add   %o0, %o3, %o2           !
  83 2:
  84         retl
  85           nop
  86         SET_SIZE(tickcmpr_set)
  87 
  88         ENTRY_NP(tickcmpr_disable)
  89         mov     1, %g1
  90         sllx    %g1, TICKINT_DIS_SHFT, %o0
  91         WR_TICKCMPR(%o0,%o4,%o5,__LINE__)       ! Write to TICK_CMPR
  92         retl
  93           nop
  94         SET_SIZE(tickcmpr_disable)
  95 
  96         .seg    ".text"
  97 tick_write_delta_panic:
  98         .asciz  "tick_write_delta: not supported, delta: 0x%lx"
  99 
 100 /*
 101  * tick_write_delta() is intended to increment %stick by the specified delta,
 102  * but %stick is only writeable in hyperprivileged mode and at present there
 103  * is no provision for this. tick_write_delta is called by the cylic subsystem
 104  * if a negative %stick delta is observed after cyclic processing is resumed
 105  * after an event such as an OS suspend/resume. On sun4v, the suspend/resume
 106  * routines should adjust the %stick offset preventing the cyclic subsystem
 107  * from detecting a negative delta. If a negative delta is detected, panic the
 108  * system. The negative delta could be caused by improper %stick
 109  * synchronization after a suspend/resume.
 110  */
 111         ENTRY_NP(tick_write_delta)
 112         sethi   %hi(tick_write_delta_panic), %o1
 113         save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
 114         mov     %i0, %o1
 115         call    panic
 116           or    %i1, %lo(tick_write_delta_panic), %o0
 117         /*NOTREACHED*/
 118         retl
 119           nop
 120 
 121         ENTRY_NP(tickcmpr_disabled)
 122         RD_TICKCMPR(%g1,%o0,%o1,__LINE__)
 123         retl
 124           srlx  %g1, TICKINT_DIS_SHFT, %o0
 125         SET_SIZE(tickcmpr_disabled)
 126 
 127 /*
 128  * Get current tick
 129  */
 130 
 131         ENTRY(gettick)
 132         ALTENTRY(randtick)
 133         GET_NATIVE_TIME(%o0,%o2,%o3,__LINE__)
 134         retl
 135           nop
 136         SET_SIZE(randtick)
 137         SET_SIZE(gettick)
 138 
 139 /*
 140  * Get current tick. For trapstat use only.
 141  */
 142         ENTRY(rdtick)
 143         retl
 144         RD_TICK_PHYSICAL(%o0)
 145         SET_SIZE(rdtick)
 146 
 147 
 148 /*
 149  * Return the counter portion of the tick register.
 150  */
 151 
 152         ENTRY_NP(gettick_counter)
 153         RD_TICK(%o0,%o1,%o2,__LINE__)
 154         retl
 155         nop
 156         SET_SIZE(gettick_counter)
 157 
 158         ENTRY_NP(gettick_npt)
 159         RD_TICK_PHYSICAL(%o0)
 160         retl
 161         srlx    %o0, 63, %o0
 162         SET_SIZE(gettick_npt)
 163 
 164         ENTRY_NP(getstick_npt)
 165         RD_STICK_PHYSICAL(%o0)
 166         retl
 167         srlx    %o0, 63, %o0
 168         SET_SIZE(getstick_npt)
 169 
 170 /*
 171  * Provide a C callable interface to the trap that reads the hi-res timer.
 172  * Returns 64-bit nanosecond timestamp in %o0 and %o1.
 173  */
 174 
 175         ENTRY_NP(gethrtime)
 176         GET_HRTIME(%g1,%o0,%o1,%o2,%o3,%o4,%o5,%g2,__LINE__)
 177                                                         ! %g1 = hrtime
 178         retl
 179           mov   %g1, %o0
 180         SET_SIZE(gethrtime)
 181 
 182         ENTRY_NP(gethrtime_unscaled)
 183         GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)   ! %g1 = native time
 184         retl
 185           mov   %g1, %o0
 186         SET_SIZE(gethrtime_unscaled)
 187 
 188         ENTRY_NP(gethrtime_waitfree)
 189         ALTENTRY(dtrace_gethrtime)
 190         GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__)   ! %g1 = native time
 191         NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
 192         retl
 193           mov   %g1, %o0
 194         SET_SIZE(dtrace_gethrtime)
 195         SET_SIZE(gethrtime_waitfree)
 196 
 197         ENTRY(gethrtime_max)
 198         NATIVE_TIME_MAX(%g1)
 199         NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
 200 
 201         ! hrtime_t's are signed, max hrtime_t must be positive
 202         mov     -1, %o2
 203         brlz,a  %g1, 1f
 204           srlx  %o2, 1, %g1
 205 1:
 206         retl
 207           mov   %g1, %o0
 208         SET_SIZE(gethrtime_max)
 209 
 210         ENTRY(scalehrtime)
 211         ldx     [%o0], %o1
 212         NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
 213         retl
 214           stx   %o1, [%o0]
 215         SET_SIZE(scalehrtime)
 216 
 217 /*
 218  * Fast trap to return a timestamp, uses trap window, leaves traps
 219  * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
 220  *
 221  * This is the handler for the ST_GETHRTIME trap.
 222  */
 223 
 224         ENTRY_NP(get_timestamp)
 225         GET_HRTIME(%g1,%g2,%g3,%g4,%g5,%o0,%o1,%o2,__LINE__)
 226         ! %g1 = hrtime
 227         srlx    %g1, 32, %o0                            ! %o0 = hi32(%g1)
 228         srl     %g1, 0, %o1                             ! %o1 = lo32(%g1)
 229         FAST_TRAP_DONE
 230         SET_SIZE(get_timestamp)
 231 
 232 /*
 233  * Macro to convert GET_HRESTIME() bits into a timestamp.
 234  *
 235  * We use two separate macros so that the platform-dependent GET_HRESTIME()
 236  * can be as small as possible; CONV_HRESTIME() implements the generic part.
 237  */
 238 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
 239         brz,pt  adj, 3f;                /* no adjustments, it's easy */ \
 240         add     hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \
 241         brlz,pn adj, 2f;                /* if hrestime_adj negative */  \
 242           srlx  nslt, ADJ_SHIFT, nslt;  /* delay: nslt >>= 4 */           \
 243         subcc   adj, nslt, %g0;         /* hrestime_adj - nslt/16 */    \
 244         movg    %xcc, nslt, adj;        /* adj by min(adj, nslt/16) */  \
 245         ba      3f;                     /* go convert to sec/nsec */    \
 246           add   hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
 247 2:      addcc   adj, nslt, %g0;         /* hrestime_adj + nslt/16 */    \
 248         bge,a,pt %xcc, 3f;              /* is adj less negative? */     \
 249           add   hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \
 250         sub     hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
 251 3:      cmp     hrestnsec, nano;        /* more than a billion? */      \
 252         bl,pt   %xcc, 4f;               /* if not, we're done */        \
 253           nop;                          /* delay: do nothing :( */      \
 254         add     hrestsec, 1, hrestsec;  /* hrest.tv_sec++; */           \
 255         sub     hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */     \
 256         ba,a    3b;                     /* check >= billion again */ \
 257 4:
 258 
 259         ENTRY_NP(gethrestime)
 260         GET_HRESTIME(%o1,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
 261         CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
 262         stn     %o1, [%o0]
 263         retl
 264           stn   %o2, [%o0 + CLONGSIZE]
 265         SET_SIZE(gethrestime)
 266 
 267 /*
 268  * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
 269  * seconds.
 270  */
 271         ENTRY_NP(gethrestime_sec)
 272         GET_HRESTIME(%o0,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__)
 273         CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
 274         retl                                    ! %o0 current hrestime seconds
 275           nop
 276         SET_SIZE(gethrestime_sec)
 277 
 278 /*
 279  * Returns the hrestime on the last tick.  This is simpler than gethrestime()
 280  * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
 281  * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
 282  * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
 283  * rely on load dependencies to effect the membar #LoadLoad, instead declaring
 284  * it explicitly.)
 285  */
 286         ENTRY_NP(gethrestime_lasttick)
 287         sethi   %hi(hres_lock), %o1
 288 0:
 289         lduw    [%o1 + %lo(hres_lock)], %o2     ! Load lock value
 290         membar  #LoadLoad                       ! Load of lock must complete
 291         andn    %o2, 1, %o2                     ! Mask off lowest bit   
 292         ldn     [%o1 + %lo(hrestime)], %g1      ! Seconds.
 293         add     %o1, %lo(hrestime), %o4
 294         ldn     [%o4 + CLONGSIZE], %g2          ! Nanoseconds.
 295         membar  #LoadLoad                       ! All loads must complete
 296         lduw    [%o1 + %lo(hres_lock)], %o3     ! Reload lock value
 297         cmp     %o3, %o2                        ! If lock is locked or has
 298         bne     0b                              !   changed, retry.
 299           stn   %g1, [%o0]                      ! Delay: store seconds
 300         retl
 301           stn   %g2, [%o0 + CLONGSIZE]          ! Delay: store nanoseconds
 302         SET_SIZE(gethrestime_lasttick)
 303 
 304 /*
 305  * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
 306  *
 307  * This is the handler for the ST_GETHRESTIME trap.
 308  */
 309 
 310         ENTRY_NP(get_hrestime)
 311         GET_HRESTIME(%o0,%o1,%g1,%g2,%g3,%g4,%g5,%o2,%o3,__LINE__)
 312         CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
 313         FAST_TRAP_DONE
 314         SET_SIZE(get_hrestime)
 315 
 316 /*
 317  * Fast trap to return lwp virtual time, uses trap window, leaves traps
 318  * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
 319  * of nanoseconds consumed.
 320  *
 321  * This is the handler for the ST_GETHRVTIME trap.
 322  *
 323  * Register usage:
 324  *      %o0, %o1 = return lwp virtual time
 325  *      %o2 = CPU/thread
 326  *      %o3 = lwp
 327  *      %g1 = scratch
 328  *      %g5 = scratch
 329  */
 330         ENTRY_NP(get_virtime)
 331         GET_NATIVE_TIME(%g5,%g1,%g2,__LINE__)   ! %g5 = native time in ticks
 332         CPU_ADDR(%g2, %g3)                      ! CPU struct ptr to %g2
 333         ldn     [%g2 + CPU_THREAD], %g2         ! thread pointer to %g2
 334         ldn     [%g2 + T_LWP], %g3              ! lwp pointer to %g3
 335 
 336         /*
 337          * Subtract start time of current microstate from time
 338          * of day to get increment for lwp virtual time.
 339          */
 340         ldx     [%g3 + LWP_STATE_START], %g1    ! ms_state_start
 341         sub     %g5, %g1, %g5
 342 
 343         /*
 344          * Add current value of ms_acct[LMS_USER]
 345          */
 346         ldx     [%g3 + LWP_ACCT_USER], %g1      ! ms_acct[LMS_USER]
 347         add     %g5, %g1, %g5
 348         NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 
 349         
 350         srl     %g5, 0, %o1                     ! %o1 = lo32(%g5)
 351         srlx    %g5, 32, %o0                    ! %o0 = hi32(%g5)
 352 
 353         FAST_TRAP_DONE
 354         SET_SIZE(get_virtime)
 355 
 356 
 357 
 358         .seg    ".text"
 359 hrtime_base_panic:
 360         .asciz  "hrtime_base stepping back"
 361 
 362 
 363         ENTRY_NP(hres_tick)
 364         save    %sp, -SA(MINFRAME), %sp ! get a new window
 365 
 366         sethi   %hi(hrestime), %l4
 367         ldstub  [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5  ! try locking
 368 7:      tst     %l5
 369         bz,pt   %xcc, 8f                        ! if we got it, drive on
 370           ld    [%l4 + %lo(nsec_scale)], %l5    ! delay: %l5 = scaling factor
 371         ldub    [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 372 9:      tst     %l5
 373         bz,a,pn %xcc, 7b
 374           ldstub        [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 375         ba,pt   %xcc, 9b
 376           ldub  [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 377 8:
 378         membar  #StoreLoad|#StoreStore
 379 
 380         !
 381         ! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
 382         !
 383         ldx     [%l4 + %lo(hrtime_base)], %g1   ! load current hrtime_base
 384         GET_NATIVE_TIME(%l0,%l3,%l6,__LINE__)   ! current native time
 385         stx     %l0, [%l4 + %lo(hres_last_tick)]! prev = current
 386         ! convert native time to nsecs
 387         NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
 388 
 389         sub     %l0, %g1, %i1                   ! get accurate nsec delta
 390 
 391         ldx     [%l4 + %lo(hrtime_base)], %l1   
 392         cmp     %l1, %l0
 393         bg,pn   %xcc, 9f
 394           nop
 395 
 396         stx     %l0, [%l4 + %lo(hrtime_base)]   ! update hrtime_base
 397 
 398         !
 399         ! apply adjustment, if any
 400         !
 401         ldx     [%l4 + %lo(hrestime_adj)], %l0  ! %l0 = hrestime_adj
 402         brz     %l0, 2f
 403                                                 ! hrestime_adj == 0 ?
 404                                                 ! yes, skip adjustments
 405           clr   %l5                             ! delay: set adj to zero
 406         tst     %l0                             ! is hrestime_adj >= 0 ?
 407         bge,pt  %xcc, 1f                        ! yes, go handle positive case
 408           srl   %i1, ADJ_SHIFT, %l5             ! delay: %l5 = adj
 409 
 410         addcc   %l0, %l5, %g0                   ! hrestime_adj < -adj ?
 411         bl,pt   %xcc, 2f                        ! yes, use current adj
 412           neg   %l5                             ! delay: %l5 = -adj
 413         ba,pt   %xcc, 2f
 414           mov   %l0, %l5                        ! no, so set adj = hrestime_adj
 415 1:
 416         subcc   %l0, %l5, %g0                   ! hrestime_adj < adj ?
 417         bl,a,pt %xcc, 2f                        ! yes, set adj = hrestime_adj
 418           mov   %l0, %l5                        ! delay: adj = hrestime_adj
 419 2:
 420         ldx     [%l4 + %lo(timedelta)], %l0     ! %l0 = timedelta
 421         sub     %l0, %l5, %l0                   ! timedelta -= adj
 422 
 423         stx     %l0, [%l4 + %lo(timedelta)]     ! store new timedelta
 424         stx     %l0, [%l4 + %lo(hrestime_adj)]  ! hrestime_adj = timedelta
 425 
 426         or      %l4, %lo(hrestime), %l2
 427         ldn     [%l2], %i2                      ! %i2:%i3 = hrestime sec:nsec
 428         ldn     [%l2 + CLONGSIZE], %i3
 429         add     %i3, %l5, %i3                   ! hrestime.nsec += adj
 430         add     %i3, %i1, %i3                   ! hrestime.nsec += nslt
 431 
 432         set     NANOSEC, %l5                    ! %l5 = NANOSEC
 433         cmp     %i3, %l5
 434         bl,pt   %xcc, 5f                        ! if hrestime.tv_nsec < NANOSEC
 435           sethi %hi(one_sec), %i1               ! delay
 436         add     %i2, 0x1, %i2                   ! hrestime.tv_sec++
 437         sub     %i3, %l5, %i3                   ! hrestime.tv_nsec - NANOSEC
 438         mov     0x1, %l5
 439         st      %l5, [%i1 + %lo(one_sec)]
 440 5:
 441         stn     %i2, [%l2]
 442         stn     %i3, [%l2 + CLONGSIZE]          ! store the new hrestime
 443 
 444         membar  #StoreStore
 445 
 446         ld      [%l4 + %lo(hres_lock)], %i1
 447         inc     %i1                             ! release lock
 448         st      %i1, [%l4 + %lo(hres_lock)]     ! clear hres_lock
 449 
 450         ret
 451         restore
 452 
 453 9:
 454         !
 455         ! release hres_lock
 456         !
 457         ld      [%l4 + %lo(hres_lock)], %i1
 458         inc     %i1
 459         st      %i1, [%l4 + %lo(hres_lock)]
 460 
 461         sethi   %hi(hrtime_base_panic), %o0
 462         call    panic
 463           or    %o0, %lo(hrtime_base_panic), %o0
 464 
 465         SET_SIZE(hres_tick)
 466 
 467         .seg    ".text"
 468 kstat_q_panic_msg:
 469         .asciz  "kstat_q_exit: qlen == 0"
 470 
 471         ENTRY(kstat_q_panic)
 472         save    %sp, -SA(MINFRAME), %sp
 473         sethi   %hi(kstat_q_panic_msg), %o0
 474         call    panic
 475           or    %o0, %lo(kstat_q_panic_msg), %o0
 476         /*NOTREACHED*/
 477         SET_SIZE(kstat_q_panic)
 478 
 479 #define BRZPN   brz,pn
 480 #define BRZPT   brz,pt
 481 
 482 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
 483         ld      [%o0 + QTYPE/**/CNT], %o1;      /* %o1 = old qlen */    \
 484         QOP     %o1, 1, %o2;                    /* %o2 = new qlen */    \
 485         QBR     %o1, QZERO;                     /* done if qlen == 0 */ \
 486         st      %o2, [%o0 + QTYPE/**/CNT];      /* delay: save qlen */  \
 487         ldx     [%o0 + QTYPE/**/LASTUPDATE], %o3;                       \
 488         ldx     [%o0 + QTYPE/**/TIME], %o4;     /* %o4 = old time */    \
 489         ldx     [%o0 + QTYPE/**/LENTIME], %o5;  /* %o5 = old lentime */ \
 490         sub     %g1, %o3, %o2;                  /* %o2 = time delta */  \
 491         mulx    %o1, %o2, %o3;                  /* %o3 = cur lentime */ \
 492         add     %o4, %o2, %o4;                  /* %o4 = new time */    \
 493         add     %o5, %o3, %o5;                  /* %o5 = new lentime */ \
 494         stx     %o4, [%o0 + QTYPE/**/TIME];     /* save time */         \
 495         stx     %o5, [%o0 + QTYPE/**/LENTIME];  /* save lentime */      \
 496 QRETURN;                                                                \
 497         stx     %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
 498 
 499 #if !defined(DEBUG)
 500 /*
 501  * same as KSTAT_Q_UPDATE but without:
 502  * QBR     %o1, QZERO;
 503  * to be used only with non-debug build. mimics ASSERT() behaviour.
 504  */
 505 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
 506         ld      [%o0 + QTYPE/**/CNT], %o1;      /* %o1 = old qlen */    \
 507         QOP     %o1, 1, %o2;                    /* %o2 = new qlen */    \
 508         st      %o2, [%o0 + QTYPE/**/CNT];      /* delay: save qlen */  \
 509         ldx     [%o0 + QTYPE/**/LASTUPDATE], %o3;                       \
 510         ldx     [%o0 + QTYPE/**/TIME], %o4;     /* %o4 = old time */    \
 511         ldx     [%o0 + QTYPE/**/LENTIME], %o5;  /* %o5 = old lentime */ \
 512         sub     %g1, %o3, %o2;                  /* %o2 = time delta */  \
 513         mulx    %o1, %o2, %o3;                  /* %o3 = cur lentime */ \
 514         add     %o4, %o2, %o4;                  /* %o4 = new time */    \
 515         add     %o5, %o3, %o5;                  /* %o5 = new lentime */ \
 516         stx     %o4, [%o0 + QTYPE/**/TIME];     /* save time */         \
 517         stx     %o5, [%o0 + QTYPE/**/LENTIME];  /* save lentime */      \
 518 QRETURN;                                                                \
 519         stx     %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
 520 #endif
 521 
 522         .align 16
 523         ENTRY(kstat_waitq_enter)
 524         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 525         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
 526         SET_SIZE(kstat_waitq_enter)
 527 
 528         .align 16
 529         ENTRY(kstat_waitq_exit)
 530         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 531 #if defined(DEBUG)
 532         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
 533 #else
 534         KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W)
 535 #endif
 536         SET_SIZE(kstat_waitq_exit)
 537 
 538         .align 16
 539         ENTRY(kstat_runq_enter)
 540         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 541         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
 542         SET_SIZE(kstat_runq_enter)
 543 
 544         .align 16
 545         ENTRY(kstat_runq_exit)
 546         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 547 #if defined(DEBUG)
 548         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
 549 #else
 550         KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R)
 551 #endif
 552         SET_SIZE(kstat_runq_exit)
 553 
 554         .align 16
 555         ENTRY(kstat_waitq_to_runq)
 556         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 557 #if defined(DEBUG)
 558         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
 559 #else
 560         KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
 561 #endif
 562         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
 563         SET_SIZE(kstat_waitq_to_runq)
 564 
 565         .align 16
 566         ENTRY(kstat_runq_back_to_waitq)
 567         GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__)
 568 #if defined(DEBUG)
 569         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
 570 #else
 571         KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
 572 #endif
 573         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
 574         SET_SIZE(kstat_runq_back_to_waitq)
 575 
 576         /*
 577          *  -- WARNING --
 578          *
 579          * The following variables MUST be together on a 128-byte boundary.
 580          * In addition to the primary performance motivation (having them all
 581          * on the same cache line(s)), code here and in the GET*TIME() macros
 582          * assumes that they all have the same high 22 address bits (so
 583          * there's only one sethi).
 584          */
 585         .seg    ".data"
 586         .global timedelta, hres_last_tick, hrestime, hrestime_adj
 587         .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
 588         .global nsec_shift, adj_shift, native_tick_offset, native_stick_offset
 589 
 590         /* XXX - above comment claims 128-bytes is necessary */
 591         .align  64
 592 timedelta:
 593         .word   0, 0            /* int64_t */
 594 hres_last_tick:
 595         .word   0, 0            /* hrtime_t */
 596 hrestime:
 597         .nword  0, 0            /* 2 longs */
 598 hrestime_adj:
 599         .word   0, 0            /* int64_t */
 600 hres_lock:
 601         .word   0
 602 nsec_scale:
 603         .word   0
 604 hrtime_base:
 605         .word   0, 0
 606 traptrace_use_stick:
 607         .word   0
 608 nsec_shift:
 609         .word   NSEC_SHIFT
 610 adj_shift:
 611         .word   ADJ_SHIFT
 612         .align  8
 613 native_tick_offset:
 614         .word   0, 0
 615         .align  8
 616 native_stick_offset:
 617         .word   0, 0
 618 
 619 
 620 /*
 621  * drv_usecwait(clock_t n)      [DDI/DKI - section 9F]
 622  * usec_delay(int n)            [compatibility - should go one day]
 623  * Delay by spinning.
 624  *
 625  * delay for n microseconds.  numbers <= 0 delay 1 usec
 626  *
 627  * With UltraSPARC-III the combination of supporting mixed-speed CPUs
 628  * and variable clock rate for power management requires that we
 629  * use %stick to implement this routine.
 630  */
 631 
 632         ENTRY(drv_usecwait)
 633         ALTENTRY(usec_delay)
 634         brlez,a,pn %o0, 0f
 635           mov   1, %o0
 636 0:
 637         sethi   %hi(sticks_per_usec), %o1
 638         lduw    [%o1 + %lo(sticks_per_usec)], %o1
 639         mulx    %o1, %o0, %o1           ! Scale usec to ticks
 640         inc     %o1                     ! We don't start on a tick edge
 641         GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
 642         add     %o1, %o2, %o1
 643 
 644 1:      cmp     %o1, %o2
 645         GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__)
 646         bgeu,pt %xcc, 1b
 647           nop
 648         retl
 649           nop
 650         SET_SIZE(usec_delay)
 651         SET_SIZE(drv_usecwait)
 652 
 653 /*
 654  * Level-14 interrupt prologue.
 655  */
 656         ENTRY_NP(pil14_interrupt)
 657         CPU_ADDR(%g1, %g2)
 658         rdpr    %pil, %g6                       ! %g6 = interrupted PIL
 659         stn     %g6, [%g1 + CPU_PROFILE_PIL]    ! record interrupted PIL
 660         rdpr    %tstate, %g6
 661         rdpr    %tpc, %g5
 662         btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
 663         bnz,a,pt %xcc, 1f
 664           stn   %g5, [%g1 + CPU_PROFILE_PC]     ! if so, record kernel PC
 665         stn     %g5, [%g1 + CPU_PROFILE_UPC]    ! if not, record user PC
 666         ba      pil_interrupt_common            ! must be large-disp branch
 667           stn   %g0, [%g1 + CPU_PROFILE_PC]     ! zero kernel PC
 668 1:      ba      pil_interrupt_common            ! must be large-disp branch
 669           stn   %g0, [%g1 + CPU_PROFILE_UPC]    ! zero user PC
 670         SET_SIZE(pil14_interrupt)
 671 
 672         ENTRY_NP(tick_rtt)
 673         !
 674         ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
 675         ! disabled.  If TICK_COMPARE is enabled, we know that we need to
 676         ! reenqueue the interrupt request structure.  We'll then check TICKINT
 677         ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
 678         ! interrupt.  In this case, TICK_COMPARE may have been rewritten
 679         ! recently; we'll compare %o5 to the current time to verify that it's
 680         ! in the future.  
 681         !
 682         ! Note that %o5 is live until after 1f.
 683         ! XXX - there is a subroutine call while %o5 is live!
 684         !
 685         RD_TICKCMPR(%o5,%g1,%g2,__LINE__)
 686         srlx    %o5, TICKINT_DIS_SHFT, %g1
 687         brnz,pt %g1, 2f
 688           nop
 689 
 690         rdpr    %pstate, %g5
 691         andn    %g5, PSTATE_IE, %g1
 692         wrpr    %g0, %g1, %pstate               ! Disable vec interrupts
 693 
 694         sethi   %hi(cbe_level14_inum), %o1
 695         ldx     [%o1 + %lo(cbe_level14_inum)], %o1
 696         call    intr_enqueue_req ! preserves %o5 and %g5
 697           mov   PIL_14, %o0
 698 
 699         ! Check SOFTINT for TICKINT/STICKINT
 700         rd      SOFTINT, %o4
 701         set     (TICK_INT_MASK | STICK_INT_MASK), %o0
 702         andcc   %o4, %o0, %g0
 703         bz,a,pn %icc, 2f
 704           wrpr  %g0, %g5, %pstate               ! Enable vec interrupts
 705 
 706         ! clear TICKINT/STICKINT
 707         wr      %o0, CLEAR_SOFTINT
 708 
 709         !
 710         ! Now that we've cleared TICKINT, we can reread %tick and confirm
 711         ! that the value we programmed is still in the future.  If it isn't,
 712         ! we need to reprogram TICK_COMPARE to fire as soon as possible.
 713         !
 714         GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)   ! %o0 = tick
 715         cmp     %o5, %o0                        ! In the future?
 716         bg,a,pt %xcc, 2f                        ! Yes, drive on.
 717           wrpr  %g0, %g5, %pstate               !   delay: enable vec intr
 718 
 719         !
 720         ! If we're here, then we have programmed TICK_COMPARE with a %tick
 721         ! which is in the past; we'll now load an initial step size, and loop
 722         ! until we've managed to program TICK_COMPARE to fire in the future.
 723         !
 724         mov     8, %o4                          ! 8 = arbitrary inital step
 725 1:      add     %o0, %o4, %o5                   ! Add the step
 726         WR_TICKCMPR(%o5,%g1,%g2,__LINE__)       ! Write to TICK_CMPR
 727         GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__)   ! %o0 = tick
 728         cmp     %o5, %o0                        ! In the future?
 729         bg,a,pt %xcc, 2f                        ! Yes, drive on.
 730           wrpr  %g0, %g5, %pstate               !    delay: enable vec intr
 731         ba      1b                              ! No, try again.
 732           sllx  %o4, 1, %o4                     !    delay: double step size
 733 
 734 2:      ba      current_thread_complete
 735           nop
 736         SET_SIZE(tick_rtt)
 737 
 738 /*
 739  * Level-15 interrupt prologue.
 740  */
 741        ENTRY_NP(pil15_interrupt)
 742        CPU_ADDR(%g1, %g2)
 743        rdpr    %tstate, %g6
 744        rdpr    %tpc, %g5
 745        btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
 746        bnz,a,pt %xcc, 1f
 747        stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
 748        stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
 749        ba      pil15_epilogue                  ! must be large-disp branch
 750        stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
 751 1:     ba      pil15_epilogue                  ! must be large-disp branch
 752        stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
 753        SET_SIZE(pil15_interrupt)
 754 
 755 /*
 756  * Prefetch a page_t for write or read, this assumes a linear
 757  * scan of sequential page_t's.
 758  */
 759 /* XXXQ These should be inline templates, not functions */
 760         ENTRY(prefetch_page_w)
 761         retl
 762           nop
 763         SET_SIZE(prefetch_page_w)
 764 
 765         ENTRY(prefetch_page_r)
 766         retl
 767           nop
 768         SET_SIZE(prefetch_page_r)
 769 
 770 /*
 771  * Prefetch struct smap for write.
 772  */
 773 /* XXXQ These should be inline templates, not functions */
 774         ENTRY(prefetch_smap_w)
 775         retl
 776           nop
 777         SET_SIZE(prefetch_smap_w)
 778 
 779 /*
 780  * Generic sun4v MMU and Cache operations.
 781  */
 782 
 783         ENTRY_NP(vtag_flushpage)
 784         /*
 785          * flush page from the tlb
 786          *
 787          * %o0 = vaddr
 788          * %o1 = sfmmup
 789          */
 790         SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
 791 
 792         mov     %g1, %o1 
 793         mov     MAP_ITLB | MAP_DTLB, %o2
 794         ta      MMU_UNMAP_ADDR
 795         brz,pt  %o0, 1f
 796           nop
 797         ba      panic_bad_hcall
 798           mov   MMU_UNMAP_ADDR, %o1
 799 1:
 800         retl
 801           nop
 802         SET_SIZE(vtag_flushpage)
 803 
 804         ENTRY_NP(vtag_flushall)
 805         mov     %g0, %o0        ! XXX no cpu list yet
 806         mov     %g0, %o1        ! XXX no cpu list yet
 807         mov     MAP_ITLB | MAP_DTLB, %o2
 808         mov     MMU_DEMAP_ALL, %o5
 809         ta      FAST_TRAP
 810         brz,pt  %o0, 1f
 811           nop
 812         ba      panic_bad_hcall
 813           mov   MMU_DEMAP_ALL, %o1
 814 1:
 815         retl
 816           nop
 817         SET_SIZE(vtag_flushall)
 818 
 819         ENTRY_NP(vtag_unmap_perm_tl1)
 820         /*
 821          * x-trap to unmap perm map entry
 822          * %g1 = vaddr
 823          * %g2 = ctxnum (KCONTEXT only)
 824          */
 825         mov     %o0, %g3
 826         mov     %o1, %g4
 827         mov     %o2, %g5
 828         mov     %o5, %g6
 829         mov     %g1, %o0
 830         mov     %g2, %o1
 831         mov     MAP_ITLB | MAP_DTLB, %o2
 832         mov     UNMAP_PERM_ADDR, %o5
 833         ta      FAST_TRAP
 834         brz,pt  %o0, 1f
 835         nop
 836 
 837         mov     PTL1_BAD_HCALL, %g1
 838 
 839         cmp     %o0, H_ENOMAP
 840         move    %xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
 841         
 842         cmp     %o0, H_EINVAL 
 843         move    %xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
 844 
 845         ba,a    ptl1_panic
 846 1:
 847         mov     %g6, %o5
 848         mov     %g5, %o2
 849         mov     %g4, %o1
 850         mov     %g3, %o0
 851         retry
 852         SET_SIZE(vtag_unmap_perm_tl1)
 853 
 854         ENTRY_NP(vtag_flushpage_tl1)
 855         /*
 856          * x-trap to flush page from tlb and tsb
 857          *
 858          * %g1 = vaddr, zero-extended on 32-bit kernel
 859          * %g2 = sfmmup
 860          *
 861          * assumes TSBE_TAG = 0
 862          */
 863         srln    %g1, MMU_PAGESHIFT, %g1
 864         slln    %g1, MMU_PAGESHIFT, %g1                 /* g1 = vaddr */
 865         mov     %o0, %g3
 866         mov     %o1, %g4
 867         mov     %o2, %g5
 868         mov     %g1, %o0                        /* vaddr */
 869 
 870         SFMMU_CPU_CNUM(%g2, %o1, %g6)   /* %o1 = sfmmu cnum on this CPU */
 871 
 872         mov     MAP_ITLB | MAP_DTLB, %o2
 873         ta      MMU_UNMAP_ADDR
 874         brz,pt  %o0, 1f
 875         nop
 876           ba    ptl1_panic
 877         mov     PTL1_BAD_HCALL, %g1
 878 1:
 879         mov     %g5, %o2
 880         mov     %g4, %o1
 881         mov     %g3, %o0
 882         membar #Sync
 883         retry
 884         SET_SIZE(vtag_flushpage_tl1)
 885 
 886         ENTRY_NP(vtag_flush_pgcnt_tl1)
 887         /*
 888          * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
 889          *
 890          * %g1 = vaddr, zero-extended on 32-bit kernel
 891          * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
 892          *
 893          * NOTE: this handler relies on the fact that no
 894          *      interrupts or traps can occur during the loop
 895          *      issuing the TLB_DEMAP operations. It is assumed
 896          *      that interrupts are disabled and this code is
 897          *      fetching from the kernel locked text address.
 898          *
 899          * assumes TSBE_TAG = 0
 900          */
 901         srln    %g1, MMU_PAGESHIFT, %g1
 902         slln    %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 903         mov     %o0, %g3
 904         mov     %o1, %g4
 905         mov     %o2, %g5
 906 
 907         and     %g2, SFMMU_PGCNT_MASK, %g7      /* g7 = pgcnt - 1 */
 908         add     %g7, 1, %g7                     /* g7 = pgcnt */
 909 
 910         andn    %g2, SFMMU_PGCNT_MASK, %o0      /* %o0 = sfmmup */
 911 
 912         SFMMU_CPU_CNUM(%o0, %g2, %g6)    /* %g2 = sfmmu cnum on this CPU */
 913 
 914         set     MMU_PAGESIZE, %g6               /* g6 = pgsize */
 915 
 916 1:      
 917         mov     %g1, %o0                        /* vaddr */
 918         mov     %g2, %o1                        /* cnum */
 919         mov     MAP_ITLB | MAP_DTLB, %o2
 920         ta      MMU_UNMAP_ADDR
 921         brz,pt  %o0, 2f
 922           nop
 923         ba      ptl1_panic
 924           mov   PTL1_BAD_HCALL, %g1
 925 2:
 926         deccc   %g7                             /* decr pgcnt */
 927         bnz,pt  %icc,1b
 928           add   %g1, %g6, %g1                   /* go to nextpage */
 929 
 930         mov     %g5, %o2
 931         mov     %g4, %o1
 932         mov     %g3, %o0
 933         membar #Sync
 934         retry
 935         SET_SIZE(vtag_flush_pgcnt_tl1)
 936 
 937         ! Not implemented on US1/US2
 938         ENTRY_NP(vtag_flushall_tl1)
 939         mov     %o0, %g3
 940         mov     %o1, %g4
 941         mov     %o2, %g5
 942         mov     %o3, %g6        ! XXXQ not used?
 943         mov     %o5, %g7
 944         mov     %g0, %o0        ! XXX no cpu list yet
 945         mov     %g0, %o1        ! XXX no cpu list yet
 946         mov     MAP_ITLB | MAP_DTLB, %o2
 947         mov     MMU_DEMAP_ALL, %o5
 948         ta      FAST_TRAP
 949         brz,pt  %o0, 1f
 950           nop
 951         ba      ptl1_panic
 952           mov   PTL1_BAD_HCALL, %g1
 953 1:
 954         mov     %g7, %o5
 955         mov     %g6, %o3        ! XXXQ not used?
 956         mov     %g5, %o2
 957         mov     %g4, %o1
 958         mov     %g3, %o0
 959         retry
 960         SET_SIZE(vtag_flushall_tl1)
 961 
 962 /*
 963  * flush_instr_mem:
 964  *      Flush a portion of the I-$ starting at vaddr
 965  *      %o0 vaddr
 966  *      %o1 bytes to be flushed
 967  */
 968 
 969         ENTRY(flush_instr_mem)
 970         membar  #StoreStore                             ! Ensure the stores
 971                                                         ! are globally visible
 972 1:
 973         flush   %o0
 974         subcc   %o1, ICACHE_FLUSHSZ, %o1                ! bytes = bytes-0x20
 975         bgu,pt  %ncc, 1b
 976           add   %o0, ICACHE_FLUSHSZ, %o0                ! vaddr = vaddr+0x20
 977 
 978         retl
 979           nop
 980         SET_SIZE(flush_instr_mem)
 981 
 982 #if !defined(CUSTOM_FPZERO)
 983 
 984 /*
 985  * fp_zero() - clear all fp data registers and the fsr
 986  */
 987 
 988 .global fp_zero_zero
 989 .align 8
 990 fp_zero_zero:
 991         .xword  0
 992 
 993         ENTRY_NP(fp_zero)
 994         sethi   %hi(fp_zero_zero), %o0
 995         ldx     [%o0 + %lo(fp_zero_zero)], %fsr
 996         ldd     [%o0 + %lo(fp_zero_zero)], %f0
 997         fmovd   %f0, %f2
 998         fmovd   %f0, %f4
 999         fmovd   %f0, %f6
1000         fmovd   %f0, %f8
1001         fmovd   %f0, %f10
1002         fmovd   %f0, %f12
1003         fmovd   %f0, %f14
1004         fmovd   %f0, %f16
1005         fmovd   %f0, %f18
1006         fmovd   %f0, %f20
1007         fmovd   %f0, %f22
1008         fmovd   %f0, %f24
1009         fmovd   %f0, %f26
1010         fmovd   %f0, %f28
1011         fmovd   %f0, %f30
1012         fmovd   %f0, %f32
1013         fmovd   %f0, %f34
1014         fmovd   %f0, %f36
1015         fmovd   %f0, %f38
1016         fmovd   %f0, %f40
1017         fmovd   %f0, %f42
1018         fmovd   %f0, %f44
1019         fmovd   %f0, %f46
1020         fmovd   %f0, %f48
1021         fmovd   %f0, %f50
1022         fmovd   %f0, %f52
1023         fmovd   %f0, %f54
1024         fmovd   %f0, %f56
1025         fmovd   %f0, %f58
1026         fmovd   %f0, %f60
1027         retl
1028         fmovd   %f0, %f62
1029         SET_SIZE(fp_zero)
1030 
1031 #endif  /* CUSTOM_FPZERO */