1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #if !defined(lint)
  26 #include "assym.h"
  27 #endif  /* !lint */
  28 
  29 /*
  30  * General assembly language routines.
  31  * It is the intent of this file to contain routines that are
  32  * specific to cpu architecture.
  33  */
  34 
  35 /*
  36  * WARNING: If you add a fast trap handler which can be invoked by a
  37  * non-privileged user, you may have to use the FAST_TRAP_DONE macro
  38  * instead of "done" instruction to return back to the user mode. See
  39  * comments for the "fast_trap_done" entry point for more information.
  40  */
  41 #define FAST_TRAP_DONE  \
  42         ba,a    fast_trap_done
  43 
  44 /*
  45  * Override GET_NATIVE_TIME for the cpu module code.  This is not
  46  * guaranteed to be exactly one instruction, be careful of using
  47  * the macro in delay slots.
  48  *
  49  * Do not use any instruction that modifies condition codes as the 
  50  * caller may depend on these to remain unchanged across the macro.
  51  */
  52 #if defined(CHEETAH) || defined(OLYMPUS_C)
  53 
  54 #define GET_NATIVE_TIME(out, scr1, scr2) \
  55         rd      STICK, out
  56 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
  57         rd      STICK, reg;             \
  58         add     reg, delta, reg;        \
  59         wr      reg, STICK
  60 #define RD_TICKCMPR(out, scr)           \
  61         rd      STICK_COMPARE, out
  62 #define WR_TICKCMPR(in, scr1, scr2, label) \
  63         wr      in, STICK_COMPARE
  64 
  65 #elif defined(HUMMINGBIRD)
  66 #include <sys/spitregs.h>
  67 
  68 /*
  69  * the current hummingbird version of %stick and %stick_cmp
  70  * were both implemented as (2) 32-bit locations in ASI_IO space;
  71  * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
  72  *
  73  * 64-bit opcodes are required, but move only 32-bits:
  74  *
  75  * ldxa [phys]ASI_IO, %dst      reads  the low 32-bits from phys into %dst
  76  * stxa %src, [phys]ASI_IO      writes the low 32-bits from %src into phys
  77  *
  78  * reg equivalent               [phys]ASI_IO
  79  * ------------------           ---------------
  80  * %stick_cmp  low-32           0x1FE.0000.F060
  81  * %stick_cmp high-32           0x1FE.0000.F068
  82  * %stick      low-32           0x1FE.0000.F070
  83  * %stick     high-32           0x1FE.0000.F078
  84  */
  85 #define HSTC_LOW        0x60                    /* stick_cmp low  32-bits */
  86 #define HSTC_HIGH       0x68                    /* stick_cmp high 32-bits */
  87 #define HST_LOW         0x70                    /* stick low  32-bits */
  88 #define HST_HIGH        0x78                    /* stick high 32-bits */
  89 #define HST_DIFF        0x08                    /* low<-->high diff */
  90 
  91 /*
  92  * Any change in the number of instructions in SETL41()
  93  * will affect SETL41_OFF
  94  */
  95 #define SETL41(reg, byte) \
  96         sethi   %hi(0x1FE00000), reg;           /* 0000.0000.1FE0.0000 */ \
  97         or      reg, 0xF, reg;                  /* 0000.0000.1FE0.000F */ \
  98         sllx    reg, 12, reg;                   /* 0000.01FE.0000.F000 */ \
  99         or      reg, byte, reg;                 /* 0000.01FE.0000.F0xx */
 100 
 101 /*
 102  * SETL41_OFF is used to calulate the relative PC value when a
 103  * branch instruction needs to go over SETL41() macro
 104  */
 105 #define SETL41_OFF  16
 106 
 107 /*
 108  * reading stick requires 2 loads, and there could be an intervening
 109  * low-to-high 32-bit rollover resulting in a return value that is
 110  * off by about (2 ^ 32); this rare case is prevented by re-reading
 111  * the low-32 bits after the high-32 and verifying the "after" value
 112  * is >= the "before" value; if not, increment the high-32 value.
 113  *
 114  * this method is limited to 1 rollover, and based on the fixed
 115  * stick-frequency (5555555), requires the loads to complete within
 116  * 773 seconds; incrementing the high-32 value will not overflow for
 117  * about 52644 years.
 118  *
 119  * writing stick requires 2 stores; if the old/new low-32 value is
 120  * near 0xffffffff, there could be another rollover (also rare).
 121  * to prevent this, we first write a 0 to the low-32, then write
 122  * new values to the high-32 then the low-32.
 123  *
 124  * When we detect a carry in the lower %stick register, we need to
 125  * read HST_HIGH again. However at the point where we detect this,
 126  * we need to rebuild the register address HST_HIGH.This involves more
 127  * than one instructions and a branch is unavoidable. However, most of
 128  * the time, there is no carry. So we take the penalty of a branch
 129  * instruction only when there is carry (less frequent).
 130  * 
 131  * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
 132  * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
 133  * addr already points to HST_LOW.
 134  *
 135  * NOTE: this method requires disabling interrupts before using
 136  * DELTA_NATIVE_TIME.
 137  */
 138 #define GET_NATIVE_TIME(out, scr, tmp)  \
 139         SETL41(scr, HST_LOW);           \
 140         ldxa    [scr]ASI_IO, tmp;       \
 141         inc     HST_DIFF, scr;          \
 142         ldxa    [scr]ASI_IO, out;       \
 143         dec     HST_DIFF, scr;          \
 144         ldxa    [scr]ASI_IO, scr;       \
 145         sub     scr, tmp, tmp;          \
 146         brlz,pn tmp, .-(SETL41_OFF+24); \
 147         sllx    out, 32, out;           \
 148         or      out, scr, out
 149 #define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
 150         SETL41(addr, HST_LOW);          \
 151         ldxa    [addr]ASI_IO, tmp;      \
 152         inc     HST_DIFF, addr;         \
 153         ldxa    [addr]ASI_IO, high;     \
 154         dec     HST_DIFF, addr;         \
 155         ldxa    [addr]ASI_IO, low;      \
 156         sub     low, tmp, tmp;          \
 157         brlz,pn tmp, .-24;              \
 158         sllx    high, 32, high;         \
 159         or      high, low, high;        \
 160         add     high, delta, high;      \
 161         srl     high, 0, low;           \
 162         srlx    high, 32, high;         \
 163         stxa    %g0, [addr]ASI_IO;      \
 164         inc     HST_DIFF, addr;         \
 165         stxa    high, [addr]ASI_IO;     \
 166         dec     HST_DIFF, addr;         \
 167         stxa    low, [addr]ASI_IO
 168 #define RD_TICKCMPR(out, scr)           \
 169         SETL41(scr, HSTC_LOW);          \
 170         ldxa    [scr]ASI_IO, out;       \
 171         inc     HST_DIFF, scr;          \
 172         ldxa    [scr]ASI_IO, scr;       \
 173         sllx    scr, 32, scr;           \
 174         or      scr, out, out
 175 #define WR_TICKCMPR(in, scra, scrd, label) \
 176         SETL41(scra, HSTC_HIGH);        \
 177         srlx    in, 32, scrd;           \
 178         stxa    scrd, [scra]ASI_IO;     \
 179         dec     HST_DIFF, scra;         \
 180         stxa    in, [scra]ASI_IO
 181 
 182 #else   /* !CHEETAH && !HUMMINGBIRD */
 183 
 184 #define GET_NATIVE_TIME(out, scr1, scr2) \
 185         rdpr    %tick, out
 186 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
 187         rdpr    %tick, reg;             \
 188         add     reg, delta, reg;        \
 189         wrpr    reg, %tick
 190 #define RD_TICKCMPR(out, scr)           \
 191         rd      TICK_COMPARE, out
 192 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
 193 /*
 194  * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
 195  * The failure occurs only when the following instruction decodes to wr or
 196  * wrpr.  The workaround is to immediately follow writes to TICK_COMPARE
 197  * with a read, thus stalling the pipe and keeping following instructions
 198  * from causing data corruption.  Aligning to a quadword will ensure these
 199  * two instructions are not split due to i$ misses.
 200  */
 201 #define WR_TICKCMPR(cmpr,scr1,scr2,label)       \
 202         ba,a    .bb_errata_1.label              ;\
 203         .align  64                              ;\
 204 .bb_errata_1.label:                             ;\
 205         wr      cmpr, TICK_COMPARE              ;\
 206         rd      TICK_COMPARE, %g0
 207 #else   /* BB_ERRATA_1 */
 208 #define WR_TICKCMPR(in,scr1,scr2,label)         \
 209         wr      in, TICK_COMPARE
 210 #endif  /* BB_ERRATA_1 */
 211 
 212 #endif  /* !CHEETAH && !HUMMINGBIRD */
 213 
 214 #include <sys/clock.h>
 215 
 216 #if defined(lint)
 217 #include <sys/types.h>
 218 #include <sys/scb.h>
 219 #include <sys/systm.h>
 220 #include <sys/regset.h>
 221 #include <sys/sunddi.h>
 222 #include <sys/lockstat.h>
 223 #endif  /* lint */
 224 
 225 
 226 #include <sys/asm_linkage.h>
 227 #include <sys/privregs.h>
 228 #include <sys/machparam.h>        /* To get SYSBASE and PAGESIZE */
 229 #include <sys/machthread.h>
 230 #include <sys/clock.h>
 231 #include <sys/intreg.h>
 232 #include <sys/psr_compat.h>
 233 #include <sys/isa_defs.h>
 234 #include <sys/dditypes.h>
 235 #include <sys/intr.h>
 236 
 237 #if !defined(lint)
 238 #include "assym.h"
 239 #endif  /* !lint */
 240 
 241 #if defined(lint)
 242 
 243 uint_t
 244 get_impl(void)
 245 { return (0); }
 246 
 247 #else   /* lint */
 248 
 249         ENTRY(get_impl)
 250         GET_CPU_IMPL(%o0)
 251         retl
 252         nop
 253         SET_SIZE(get_impl)
 254 
 255 #endif  /* lint */
 256 
 257 #if defined(lint)
 258 /*
 259  * Softint generated when counter field of tick reg matches value field 
 260  * of tick_cmpr reg
 261  */
 262 /*ARGSUSED*/
 263 void
 264 tickcmpr_set(uint64_t clock_cycles)
 265 {}
 266 
 267 #else   /* lint */
 268 
 269         ENTRY_NP(tickcmpr_set)
 270         ! get 64-bit clock_cycles interval
 271         mov     %o0, %o2
 272         mov     8, %o3                  ! A reasonable initial step size
 273 1:
 274         WR_TICKCMPR(%o2,%o4,%o5,__LINE__)       ! Write to TICK_CMPR
 275 
 276         GET_NATIVE_TIME(%o0, %o4, %o5)  ! Read %tick to confirm the
 277         sllx    %o0, 1, %o0             !   value we wrote was in the future.
 278         srlx    %o0, 1, %o0
 279 
 280         cmp     %o2, %o0                ! If the value we wrote was in the
 281         bg,pt   %xcc, 2f                !   future, then blow out of here.
 282         sllx    %o3, 1, %o3             ! If not, then double our step size,
 283         ba,pt   %xcc, 1b                !   and take another lap.
 284         add     %o0, %o3, %o2           !
 285 2:
 286         retl
 287         nop
 288         SET_SIZE(tickcmpr_set)
 289 
 290 #endif  /* lint */
 291 
 292 #if defined(lint)
 293 
 294 void
 295 tickcmpr_disable(void)
 296 {}
 297 
 298 #else   /* lint */
 299 
 300         ENTRY_NP(tickcmpr_disable)
 301         mov     1, %g1
 302         sllx    %g1, TICKINT_DIS_SHFT, %o0
 303         WR_TICKCMPR(%o0,%o4,%o5,__LINE__)       ! Write to TICK_CMPR
 304         retl
 305         nop
 306         SET_SIZE(tickcmpr_disable)
 307 
 308 #endif  /* lint */
 309 
 310 #if defined(lint)
 311 
 312 /*
 313  * tick_write_delta() increments %tick by the specified delta.  This should
 314  * only be called after a CPR event to assure that gethrtime() continues to
 315  * increase monotonically.  Obviously, writing %tick needs to de done very
 316  * carefully to avoid introducing unnecessary %tick skew across CPUs.  For
 317  * this reason, we make sure we're i-cache hot before actually writing to
 318  * %tick.
 319  */
 320 /*ARGSUSED*/
 321 void
 322 tick_write_delta(uint64_t delta)
 323 {}
 324 
 325 #else   /* lint */
 326 
 327 #ifdef DEBUG
 328         .seg    ".text"
 329 tick_write_panic:
 330         .asciz  "tick_write_delta: interrupts already disabled on entry"
 331 #endif  /* DEBUG */
 332 
 333         ENTRY_NP(tick_write_delta)
 334         rdpr    %pstate, %g1
 335 #ifdef DEBUG
 336         andcc   %g1, PSTATE_IE, %g0     ! If DEBUG, check that interrupts
 337         bnz     0f                      ! aren't already disabled.
 338         sethi   %hi(tick_write_panic), %o1
 339         save    %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
 340         call    panic
 341         or      %i1, %lo(tick_write_panic), %o0
 342 #endif  /* DEBUG */
 343 0:      wrpr    %g1, PSTATE_IE, %pstate ! Disable interrupts
 344         mov     %o0, %o2
 345         ba      0f                      ! Branch to cache line-aligned instr.
 346         nop
 347         .align  16
 348 0:      nop                             ! The next 3 instructions are now hot.
 349         DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2)      ! read/inc/write %tick
 350 
 351         retl                            ! Return
 352         wrpr    %g0, %g1, %pstate       !     delay: Re-enable interrupts
 353 #endif  /* lint */
 354 
 355 #if defined(lint)
 356 /*
 357  *  return 1 if disabled
 358  */
 359 
 360 int
 361 tickcmpr_disabled(void)
 362 { return (0); }
 363 
 364 #else   /* lint */
 365 
 366         ENTRY_NP(tickcmpr_disabled)
 367         RD_TICKCMPR(%g1, %o0)
 368         retl
 369         srlx    %g1, TICKINT_DIS_SHFT, %o0
 370         SET_SIZE(tickcmpr_disabled)
 371 
 372 #endif  /* lint */
 373 
 374 /*
 375  * Get current tick
 376  */
 377 #if defined(lint)
 378 
 379 u_longlong_t
 380 gettick(void)
 381 { return (0); }
 382 
 383 u_longlong_t
 384 randtick(void)
 385 { return (0); }
 386 
 387 #else   /* lint */
 388 
 389         ENTRY(gettick)
 390         ALTENTRY(randtick)
 391         GET_NATIVE_TIME(%o0, %o2, %o3)
 392         retl
 393         nop
 394         SET_SIZE(randtick)
 395         SET_SIZE(gettick)
 396 
 397 #endif  /* lint */
 398 
 399 
 400 /*
 401  * Return the counter portion of the tick register.
 402  */
 403 
 404 #if defined(lint)
 405 
 406 uint64_t
 407 gettick_counter(void)
 408 { return(0); }
 409 
 410 #else   /* lint */
 411 
 412         ENTRY_NP(gettick_counter)
 413         rdpr    %tick, %o0
 414         sllx    %o0, 1, %o0
 415         retl
 416         srlx    %o0, 1, %o0             ! shake off npt bit
 417         SET_SIZE(gettick_counter)
 418 #endif  /* lint */
 419 
 420 /*
 421  * Provide a C callable interface to the trap that reads the hi-res timer.
 422  * Returns 64-bit nanosecond timestamp in %o0 and %o1.
 423  */
 424 
 425 #if defined(lint)
 426 
 427 hrtime_t
 428 gethrtime(void)
 429 {
 430         return ((hrtime_t)0);
 431 }
 432 
 433 hrtime_t
 434 gethrtime_unscaled(void)
 435 {
 436         return ((hrtime_t)0);
 437 }
 438 
 439 hrtime_t
 440 gethrtime_max(void)
 441 {
 442         return ((hrtime_t)0);
 443 }
 444 
 445 void
 446 scalehrtime(hrtime_t *hrt)
 447 {
 448         *hrt = 0;
 449 }
 450 
 451 void
 452 gethrestime(timespec_t *tp)
 453 {
 454         tp->tv_sec = 0;
 455         tp->tv_nsec = 0;
 456 }
 457 
 458 time_t
 459 gethrestime_sec(void)
 460 {
 461         return (0);
 462 }
 463 
 464 void
 465 gethrestime_lasttick(timespec_t *tp)
 466 {
 467         tp->tv_sec = 0;
 468         tp->tv_nsec = 0;
 469 }
 470 
 471 /*ARGSUSED*/
 472 void
 473 hres_tick(void)
 474 {
 475 }
 476 
 477 void
 478 panic_hres_tick(void)
 479 {
 480 }
 481 
 482 #else   /* lint */
 483 
 484         ENTRY_NP(gethrtime)
 485         GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
 486                                                         ! %g1 = hrtime
 487         retl
 488         mov     %g1, %o0
 489         SET_SIZE(gethrtime)
 490 
 491         ENTRY_NP(gethrtime_unscaled)
 492         GET_NATIVE_TIME(%g1, %o2, %o3)                  ! %g1 = native time
 493         retl
 494         mov     %g1, %o0
 495         SET_SIZE(gethrtime_unscaled)
 496 
 497         ENTRY_NP(gethrtime_waitfree)
 498         ALTENTRY(dtrace_gethrtime)
 499         GET_NATIVE_TIME(%g1, %o2, %o3)                  ! %g1 = native time
 500         NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
 501         retl
 502         mov     %g1, %o0
 503         SET_SIZE(dtrace_gethrtime)
 504         SET_SIZE(gethrtime_waitfree)
 505 
 506         ENTRY(gethrtime_max)
 507         NATIVE_TIME_MAX(%g1)
 508         NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
 509 
 510         ! hrtime_t's are signed, max hrtime_t must be positive
 511         mov     -1, %o2
 512         brlz,a  %g1, 1f
 513         srlx    %o2, 1, %g1
 514 1:
 515         retl
 516         mov     %g1, %o0
 517         SET_SIZE(gethrtime_max)
 518 
 519         ENTRY(scalehrtime)
 520         ldx     [%o0], %o1
 521         NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
 522         retl
 523         stx     %o1, [%o0]
 524         SET_SIZE(scalehrtime)
 525 
 526 /*
 527  * Fast trap to return a timestamp, uses trap window, leaves traps
 528  * disabled.  Returns a 64-bit nanosecond timestamp in %o0 and %o1.
 529  *
 530  * This is the handler for the ST_GETHRTIME trap.
 531  */
 532 
 533         ENTRY_NP(get_timestamp)
 534         GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2)      ! %g1 = hrtime
 535         srlx    %g1, 32, %o0                            ! %o0 = hi32(%g1)
 536         srl     %g1, 0, %o1                             ! %o1 = lo32(%g1)
 537         FAST_TRAP_DONE
 538         SET_SIZE(get_timestamp)
 539 
 540 /*
 541  * Macro to convert GET_HRESTIME() bits into a timestamp.
 542  *
 543  * We use two separate macros so that the platform-dependent GET_HRESTIME()
 544  * can be as small as possible; CONV_HRESTIME() implements the generic part.
 545  */
 546 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
 547         brz,pt  adj, 3f;                /* no adjustments, it's easy */ \
 548         add     hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \
 549         brlz,pn adj, 2f;                /* if hrestime_adj negative */  \
 550         srlx    nslt, ADJ_SHIFT, nslt;  /* delay: nslt >>= 4 */           \
 551         subcc   adj, nslt, %g0;         /* hrestime_adj - nslt/16 */    \
 552         movg    %xcc, nslt, adj;        /* adj by min(adj, nslt/16) */  \
 553         ba      3f;                     /* go convert to sec/nsec */    \
 554         add     hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
 555 2:      addcc   adj, nslt, %g0;         /* hrestime_adj + nslt/16 */    \
 556         bge,a,pt %xcc, 3f;              /* is adj less negative? */     \
 557         add     hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \
 558         sub     hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
 559 3:      cmp     hrestnsec, nano;        /* more than a billion? */      \
 560         bl,pt   %xcc, 4f;               /* if not, we're done */        \
 561         nop;                            /* delay: do nothing :( */      \
 562         add     hrestsec, 1, hrestsec;  /* hrest.tv_sec++; */           \
 563         sub     hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
 564         ba,a    3b;                     /* check >= billion again */ \
 565 4:
 566 
 567         ENTRY_NP(gethrestime)
 568         GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
 569         CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
 570         stn     %o1, [%o0]
 571         retl
 572         stn     %o2, [%o0 + CLONGSIZE]
 573         SET_SIZE(gethrestime)
 574 
 575 /*
 576  * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
 577  * seconds.
 578  */
 579         ENTRY_NP(gethrestime_sec)
 580         GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
 581         CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
 582         retl                                    ! %o0 current hrestime seconds
 583         nop
 584         SET_SIZE(gethrestime_sec)
 585 
 586 /*
 587  * Returns the hrestime on the last tick.  This is simpler than gethrestime()
 588  * and gethrestime_sec():  no conversion is required.  gethrestime_lasttick()
 589  * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
 590  * outlined in detail in clock.h.  (Unlike GET_HRESTIME/GET_HRTIME, we don't
 591  * rely on load dependencies to effect the membar #LoadLoad, instead declaring
 592  * it explicitly.)
 593  */
 594         ENTRY_NP(gethrestime_lasttick)
 595         sethi   %hi(hres_lock), %o1
 596 0:
 597         lduw    [%o1 + %lo(hres_lock)], %o2     ! Load lock value
 598         membar  #LoadLoad                       ! Load of lock must complete
 599         andn    %o2, 1, %o2                     ! Mask off lowest bit   
 600         ldn     [%o1 + %lo(hrestime)], %g1      ! Seconds.
 601         add     %o1, %lo(hrestime), %o4
 602         ldn     [%o4 + CLONGSIZE], %g2          ! Nanoseconds.
 603         membar  #LoadLoad                       ! All loads must complete
 604         lduw    [%o1 + %lo(hres_lock)], %o3     ! Reload lock value
 605         cmp     %o3, %o2                        ! If lock is locked or has
 606         bne     0b                              !   changed, retry.
 607         stn     %g1, [%o0]                      ! Delay: store seconds
 608         retl
 609         stn     %g2, [%o0 + CLONGSIZE]          ! Delay: store nanoseconds
 610         SET_SIZE(gethrestime_lasttick)
 611 
 612 /*
 613  * Fast trap for gettimeofday().  Returns a timestruc_t in %o0 and %o1.
 614  *
 615  * This is the handler for the ST_GETHRESTIME trap.
 616  */
 617 
 618         ENTRY_NP(get_hrestime)
 619         GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
 620         CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
 621         FAST_TRAP_DONE
 622         SET_SIZE(get_hrestime)
 623 
 624 /*
 625  * Fast trap to return lwp virtual time, uses trap window, leaves traps
 626  * disabled.  Returns a 64-bit number in %o0:%o1, which is the number
 627  * of nanoseconds consumed.
 628  *
 629  * This is the handler for the ST_GETHRVTIME trap.
 630  *
 631  * Register usage:
 632  *      %o0, %o1 = return lwp virtual time
 633  *      %o2 = CPU/thread
 634  *      %o3 = lwp
 635  *      %g1 = scratch
 636  *      %g5 = scratch
 637  */
 638         ENTRY_NP(get_virtime)
 639         GET_NATIVE_TIME(%g5, %g1, %g2)  ! %g5 = native time in ticks
 640         CPU_ADDR(%g2, %g3)                      ! CPU struct ptr to %g2
 641         ldn     [%g2 + CPU_THREAD], %g2         ! thread pointer to %g2
 642         ldn     [%g2 + T_LWP], %g3              ! lwp pointer to %g3
 643 
 644         /*
 645          * Subtract start time of current microstate from time
 646          * of day to get increment for lwp virtual time.
 647          */
 648         ldx     [%g3 + LWP_STATE_START], %g1    ! ms_state_start
 649         sub     %g5, %g1, %g5
 650 
 651         /*
 652          * Add current value of ms_acct[LMS_USER]
 653          */
 654         ldx     [%g3 + LWP_ACCT_USER], %g1      ! ms_acct[LMS_USER]
 655         add     %g5, %g1, %g5
 656         NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 
 657         
 658         srl     %g5, 0, %o1                     ! %o1 = lo32(%g5)
 659         srlx    %g5, 32, %o0                    ! %o0 = hi32(%g5)
 660 
 661         FAST_TRAP_DONE
 662         SET_SIZE(get_virtime)
 663 
 664 
 665 
 666         .seg    ".text"
 667 hrtime_base_panic:
 668         .asciz  "hrtime_base stepping back"
 669 
 670 
 671         ENTRY_NP(hres_tick)
 672         save    %sp, -SA(MINFRAME), %sp ! get a new window
 673 
 674         sethi   %hi(hrestime), %l4
 675         ldstub  [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5  ! try locking
 676 7:      tst     %l5
 677         bz,pt   %xcc, 8f                        ! if we got it, drive on
 678         ld      [%l4 + %lo(nsec_scale)], %l5    ! delay: %l5 = scaling factor
 679         ldub    [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 680 9:      tst     %l5
 681         bz,a,pn %xcc, 7b
 682         ldstub  [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 683         ba,pt   %xcc, 9b
 684         ldub    [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
 685 8:
 686         membar  #StoreLoad|#StoreStore
 687 
 688         !
 689         ! update hres_last_tick.  %l5 has the scaling factor (nsec_scale).
 690         !
 691         ldx     [%l4 + %lo(hrtime_base)], %g1   ! load current hrtime_base
 692         GET_NATIVE_TIME(%l0, %l3, %l6)          ! current native time
 693         stx     %l0, [%l4 + %lo(hres_last_tick)]! prev = current
 694         ! convert native time to nsecs
 695         NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
 696 
 697         sub     %l0, %g1, %i1                   ! get accurate nsec delta
 698 
 699         ldx     [%l4 + %lo(hrtime_base)], %l1   
 700         cmp     %l1, %l0
 701         bg,pn   %xcc, 9f
 702         nop
 703 
 704         stx     %l0, [%l4 + %lo(hrtime_base)]   ! update hrtime_base
 705 
 706         !
 707         ! apply adjustment, if any
 708         !
 709         ldx     [%l4 + %lo(hrestime_adj)], %l0  ! %l0 = hrestime_adj
 710         brz     %l0, 2f
 711                                                 ! hrestime_adj == 0 ?
 712                                                 ! yes, skip adjustments
 713         clr     %l5                             ! delay: set adj to zero
 714         tst     %l0                             ! is hrestime_adj >= 0 ?
 715         bge,pt  %xcc, 1f                        ! yes, go handle positive case
 716         srl     %i1, ADJ_SHIFT, %l5             ! delay: %l5 = adj
 717 
 718         addcc   %l0, %l5, %g0                   ! hrestime_adj < -adj ?
 719         bl,pt   %xcc, 2f                        ! yes, use current adj
 720         neg     %l5                             ! delay: %l5 = -adj
 721         ba,pt   %xcc, 2f
 722         mov     %l0, %l5                        ! no, so set adj = hrestime_adj
 723 1:
 724         subcc   %l0, %l5, %g0                   ! hrestime_adj < adj ?
 725         bl,a,pt %xcc, 2f                        ! yes, set adj = hrestime_adj
 726         mov     %l0, %l5                        ! delay: adj = hrestime_adj
 727 2:
 728         ldx     [%l4 + %lo(timedelta)], %l0     ! %l0 = timedelta
 729         sub     %l0, %l5, %l0                   ! timedelta -= adj
 730 
 731         stx     %l0, [%l4 + %lo(timedelta)]     ! store new timedelta
 732         stx     %l0, [%l4 + %lo(hrestime_adj)]  ! hrestime_adj = timedelta
 733 
 734         or      %l4, %lo(hrestime), %l2
 735         ldn     [%l2], %i2                      ! %i2:%i3 = hrestime sec:nsec
 736         ldn     [%l2 + CLONGSIZE], %i3
 737         add     %i3, %l5, %i3                   ! hrestime.nsec += adj
 738         add     %i3, %i1, %i3                   ! hrestime.nsec += nslt
 739 
 740         set     NANOSEC, %l5                    ! %l5 = NANOSEC
 741         cmp     %i3, %l5
 742         bl,pt   %xcc, 5f                        ! if hrestime.tv_nsec < NANOSEC
 743         sethi   %hi(one_sec), %i1               ! delay
 744         add     %i2, 0x1, %i2                   ! hrestime.tv_sec++
 745         sub     %i3, %l5, %i3                   ! hrestime.tv_nsec - NANOSEC
 746         mov     0x1, %l5
 747         st      %l5, [%i1 + %lo(one_sec)]
 748 5:
 749         stn     %i2, [%l2]
 750         stn     %i3, [%l2 + CLONGSIZE]          ! store the new hrestime
 751 
 752         membar  #StoreStore
 753 
 754         ld      [%l4 + %lo(hres_lock)], %i1
 755         inc     %i1                             ! release lock
 756         st      %i1, [%l4 + %lo(hres_lock)]     ! clear hres_lock
 757 
 758         ret
 759         restore
 760 
 761 9:
 762         !
 763         ! release hres_lock
 764         !
 765         ld      [%l4 + %lo(hres_lock)], %i1
 766         inc     %i1
 767         st      %i1, [%l4 + %lo(hres_lock)]
 768 
 769         sethi   %hi(hrtime_base_panic), %o0
 770         call    panic
 771         or      %o0, %lo(hrtime_base_panic), %o0
 772 
 773         SET_SIZE(hres_tick)
 774 
 775 #endif  /* lint */
 776 
 777 #if !defined(lint) && !defined(__lint)
 778 
 779         .seg    ".text"
 780 kstat_q_panic_msg:
 781         .asciz  "kstat_q_exit: qlen == 0"
 782 
 783         ENTRY(kstat_q_panic)
 784         save    %sp, -SA(MINFRAME), %sp
 785         sethi   %hi(kstat_q_panic_msg), %o0
 786         call    panic
 787         or      %o0, %lo(kstat_q_panic_msg), %o0
 788         /*NOTREACHED*/
 789         SET_SIZE(kstat_q_panic)
 790 
 791 #define BRZPN   brz,pn
 792 #define BRZPT   brz,pt
 793 
 794 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
 795         ld      [%o0 + QTYPE/**/CNT], %o1;      /* %o1 = old qlen */    \
 796         QOP     %o1, 1, %o2;                    /* %o2 = new qlen */    \
 797         QBR     %o1, QZERO;                     /* done if qlen == 0 */ \
 798         st      %o2, [%o0 + QTYPE/**/CNT];      /* delay: save qlen */  \
 799         ldx     [%o0 + QTYPE/**/LASTUPDATE], %o3;                       \
 800         ldx     [%o0 + QTYPE/**/TIME], %o4;     /* %o4 = old time */    \
 801         ldx     [%o0 + QTYPE/**/LENTIME], %o5;  /* %o5 = old lentime */ \
 802         sub     %g1, %o3, %o2;                  /* %o2 = time delta */  \
 803         mulx    %o1, %o2, %o3;                  /* %o3 = cur lentime */ \
 804         add     %o4, %o2, %o4;                  /* %o4 = new time */    \
 805         add     %o5, %o3, %o5;                  /* %o5 = new lentime */ \
 806         stx     %o4, [%o0 + QTYPE/**/TIME];     /* save time */         \
 807         stx     %o5, [%o0 + QTYPE/**/LENTIME];  /* save lentime */      \
 808 QRETURN;                                                                \
 809         stx     %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
 810 
 811 #if !defined(DEBUG)
 812 /*
 813  * same as KSTAT_Q_UPDATE but without:
 814  * QBR     %o1, QZERO;
 815  * to be used only with non-debug build. mimics ASSERT() behaviour.
 816  */
 817 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
 818         ld      [%o0 + QTYPE/**/CNT], %o1;      /* %o1 = old qlen */    \
 819         QOP     %o1, 1, %o2;                    /* %o2 = new qlen */    \
 820         st      %o2, [%o0 + QTYPE/**/CNT];      /* delay: save qlen */  \
 821         ldx     [%o0 + QTYPE/**/LASTUPDATE], %o3;                       \
 822         ldx     [%o0 + QTYPE/**/TIME], %o4;     /* %o4 = old time */    \
 823         ldx     [%o0 + QTYPE/**/LENTIME], %o5;  /* %o5 = old lentime */ \
 824         sub     %g1, %o3, %o2;                  /* %o2 = time delta */  \
 825         mulx    %o1, %o2, %o3;                  /* %o3 = cur lentime */ \
 826         add     %o4, %o2, %o4;                  /* %o4 = new time */    \
 827         add     %o5, %o3, %o5;                  /* %o5 = new lentime */ \
 828         stx     %o4, [%o0 + QTYPE/**/TIME];     /* save time */         \
 829         stx     %o5, [%o0 + QTYPE/**/LENTIME];  /* save lentime */      \
 830 QRETURN;                                                                \
 831         stx     %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
 832 #endif
 833 
 834         .align 16
 835         ENTRY(kstat_waitq_enter)
 836         GET_NATIVE_TIME(%g1, %g2, %g3)
 837         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
 838         SET_SIZE(kstat_waitq_enter)
 839 
 840         .align 16
 841         ENTRY(kstat_waitq_exit)
 842         GET_NATIVE_TIME(%g1, %g2, %g3)
 843 #if defined(DEBUG)
 844         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
 845 #else
 846         KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W)
 847 #endif
 848         SET_SIZE(kstat_waitq_exit)
 849 
 850         .align 16
 851         ENTRY(kstat_runq_enter)
 852         GET_NATIVE_TIME(%g1, %g2, %g3)
 853         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
 854         SET_SIZE(kstat_runq_enter)
 855 
 856         .align 16
 857         ENTRY(kstat_runq_exit)
 858         GET_NATIVE_TIME(%g1, %g2, %g3)
 859 #if defined(DEBUG)
 860         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
 861 #else
 862         KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R)
 863 #endif
 864         SET_SIZE(kstat_runq_exit)
 865 
 866         .align 16
 867         ENTRY(kstat_waitq_to_runq)
 868         GET_NATIVE_TIME(%g1, %g2, %g3)
 869 #if defined(DEBUG)
 870         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
 871 #else
 872         KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
 873 #endif
 874         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
 875         SET_SIZE(kstat_waitq_to_runq)
 876 
 877         .align 16
 878         ENTRY(kstat_runq_back_to_waitq)
 879         GET_NATIVE_TIME(%g1, %g2, %g3)
 880 #if defined(DEBUG)
 881         KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
 882 #else
 883         KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
 884 #endif
 885         KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
 886         SET_SIZE(kstat_runq_back_to_waitq)
 887 
 888 #endif  /* !(lint || __lint) */
 889 
 890 #ifdef lint     
 891 
 892 int64_t timedelta;
 893 hrtime_t hres_last_tick;
 894 volatile timestruc_t hrestime;
 895 int64_t hrestime_adj;
 896 volatile int hres_lock;
 897 uint_t nsec_scale;
 898 hrtime_t hrtime_base;
 899 int traptrace_use_stick;
 900 
 901 #else   /* lint */
 902         /*
 903          *  -- WARNING --
 904          *
 905          * The following variables MUST be together on a 128-byte boundary.
 906          * In addition to the primary performance motivation (having them all
 907          * on the same cache line(s)), code here and in the GET*TIME() macros
 908          * assumes that they all have the same high 22 address bits (so
 909          * there's only one sethi).
 910          */
 911         .seg    ".data"
 912         .global timedelta, hres_last_tick, hrestime, hrestime_adj
 913         .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
 914         .global nsec_shift, adj_shift
 915 
 916         /* XXX - above comment claims 128-bytes is necessary */
 917         .align  64
 918 timedelta:
 919         .word   0, 0            /* int64_t */
 920 hres_last_tick:
 921         .word   0, 0            /* hrtime_t */
 922 hrestime:
 923         .nword  0, 0            /* 2 longs */
 924 hrestime_adj:
 925         .word   0, 0            /* int64_t */
 926 hres_lock:
 927         .word   0
 928 nsec_scale:
 929         .word   0
 930 hrtime_base:
 931         .word   0, 0
 932 traptrace_use_stick:
 933         .word   0
 934 nsec_shift:
 935         .word   NSEC_SHIFT
 936 adj_shift:
 937         .word   ADJ_SHIFT
 938 
 939 #endif  /* lint */
 940 
 941 
 942 /*
 943  * drv_usecwait(clock_t n)      [DDI/DKI - section 9F]
 944  * usec_delay(int n)            [compatibility - should go one day]
 945  * Delay by spinning.
 946  *
 947  * delay for n microseconds.  numbers <= 0 delay 1 usec
 948  *
 949  * With UltraSPARC-III the combination of supporting mixed-speed CPUs
 950  * and variable clock rate for power management requires that we
 951  * use %stick to implement this routine.
 952  *
 953  * For OPL platforms that support the "sleep" instruction, we
 954  * conditionally (ifdef'ed) insert a "sleep" instruction in
 955  * the loop. Note that theoritically we should have move (duplicated)
 956  * the code down to spitfire/us3/opl specific asm files - but this
 957  * is alot of code duplication just to add one "sleep" instruction.
 958  * We chose less code duplication for this.
 959  */
 960 
 961 #if defined(lint)
 962 
 963 /*ARGSUSED*/
 964 void
 965 drv_usecwait(clock_t n)
 966 {}
 967 
 968 /*ARGSUSED*/
 969 void
 970 usec_delay(int n)
 971 {}
 972 
 973 #else   /* lint */
 974 
 975         ENTRY(drv_usecwait)
 976         ALTENTRY(usec_delay)
 977         brlez,a,pn %o0, 0f
 978         mov     1, %o0
 979 0:
 980         sethi   %hi(sticks_per_usec), %o1
 981         lduw    [%o1 + %lo(sticks_per_usec)], %o1
 982         mulx    %o1, %o0, %o1           ! Scale usec to ticks
 983         inc     %o1                     ! We don't start on a tick edge
 984         GET_NATIVE_TIME(%o2, %o3, %o4)
 985         add     %o1, %o2, %o1
 986 
 987 1:
 988 #ifdef  _OPL
 989         .word 0x81b01060                ! insert "sleep" instruction
 990 #endif /* _OPL */                       ! use byte code for now
 991         cmp     %o1, %o2
 992         GET_NATIVE_TIME(%o2, %o3, %o4)
 993         bgeu,pt %xcc, 1b
 994         nop
 995         retl
 996         nop
 997         SET_SIZE(usec_delay)
 998         SET_SIZE(drv_usecwait)
 999 #endif  /* lint */
1000 
1001 #if defined(lint)
1002 
1003 /* ARGSUSED */
1004 void
1005 pil14_interrupt(int level)
1006 {}
1007 
1008 #else   /* lint */
1009 
1010 /*
1011  * Level-14 interrupt prologue.
1012  */
1013         ENTRY_NP(pil14_interrupt)
1014         CPU_ADDR(%g1, %g2)
1015         rdpr    %pil, %g6                       ! %g6 = interrupted PIL
1016         stn     %g6, [%g1 + CPU_PROFILE_PIL]    ! record interrupted PIL
1017         rdpr    %tstate, %g6
1018         rdpr    %tpc, %g5
1019         btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
1020         bnz,a,pt %xcc, 1f
1021         stn     %g5, [%g1 + CPU_PROFILE_PC]     ! if so, record kernel PC
1022         stn     %g5, [%g1 + CPU_PROFILE_UPC]    ! if not, record user PC
1023         ba      pil_interrupt_common            ! must be large-disp branch
1024         stn     %g0, [%g1 + CPU_PROFILE_PC]     ! zero kernel PC
1025 1:      ba      pil_interrupt_common            ! must be large-disp branch
1026         stn     %g0, [%g1 + CPU_PROFILE_UPC]    ! zero user PC
1027         SET_SIZE(pil14_interrupt)
1028 
1029         ENTRY_NP(tick_rtt)
1030         !
1031         ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
1032         ! disabled.  If TICK_COMPARE is enabled, we know that we need to
1033         ! reenqueue the interrupt request structure.  We'll then check TICKINT
1034         ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
1035         ! interrupt.  In this case, TICK_COMPARE may have been rewritten
1036         ! recently; we'll compare %o5 to the current time to verify that it's
1037         ! in the future.  
1038         !
1039         ! Note that %o5 is live until after 1f.
1040         ! XXX - there is a subroutine call while %o5 is live!
1041         !
1042         RD_TICKCMPR(%o5, %g1)
1043         srlx    %o5, TICKINT_DIS_SHFT, %g1
1044         brnz,pt %g1, 2f
1045         nop
1046 
1047         rdpr    %pstate, %g5
1048         andn    %g5, PSTATE_IE, %g1
1049         wrpr    %g0, %g1, %pstate               ! Disable vec interrupts
1050 
1051         sethi   %hi(cbe_level14_inum), %o1
1052         ldx     [%o1 + %lo(cbe_level14_inum)], %o1
1053         call    intr_enqueue_req ! preserves %o5 and %g5
1054         mov     PIL_14, %o0
1055 
1056         ! Check SOFTINT for TICKINT/STICKINT
1057         rd      SOFTINT, %o4
1058         set     (TICK_INT_MASK | STICK_INT_MASK), %o0
1059         andcc   %o4, %o0, %g0
1060         bz,a,pn %icc, 2f
1061         wrpr    %g0, %g5, %pstate               ! Enable vec interrupts
1062 
1063         ! clear TICKINT/STICKINT
1064         wr      %o0, CLEAR_SOFTINT
1065 
1066         !
1067         ! Now that we've cleared TICKINT, we can reread %tick and confirm
1068         ! that the value we programmed is still in the future.  If it isn't,
1069         ! we need to reprogram TICK_COMPARE to fire as soon as possible.
1070         !
1071         GET_NATIVE_TIME(%o0, %g1, %g2)          ! %o0 = tick
1072         sllx    %o0, 1, %o0                     ! Clear the DIS bit
1073         srlx    %o0, 1, %o0
1074         cmp     %o5, %o0                        ! In the future?
1075         bg,a,pt %xcc, 2f                        ! Yes, drive on.
1076         wrpr    %g0, %g5, %pstate               !   delay: enable vec intr
1077 
1078         !
1079         ! If we're here, then we have programmed TICK_COMPARE with a %tick
1080         ! which is in the past; we'll now load an initial step size, and loop
1081         ! until we've managed to program TICK_COMPARE to fire in the future.
1082         !
1083         mov     8, %o4                          ! 8 = arbitrary inital step
1084 1:      add     %o0, %o4, %o5                   ! Add the step
1085         WR_TICKCMPR(%o5,%g1,%g2,__LINE__)       ! Write to TICK_CMPR
1086         GET_NATIVE_TIME(%o0, %g1, %g2)          ! %o0 = tick
1087         sllx    %o0, 1, %o0                     ! Clear the DIS bit
1088         srlx    %o0, 1, %o0
1089         cmp     %o5, %o0                        ! In the future?
1090         bg,a,pt %xcc, 2f                        ! Yes, drive on.
1091         wrpr    %g0, %g5, %pstate               !    delay: enable vec intr
1092         ba      1b                              ! No, try again.
1093         sllx    %o4, 1, %o4                     !    delay: double step size
1094 
1095 2:      ba      current_thread_complete
1096         nop
1097         SET_SIZE(tick_rtt)
1098 
1099 #endif  /* lint */
1100 
1101 #if defined(lint)
1102 
1103 /* ARGSUSED */
1104 void
1105 pil15_interrupt(int level)
1106 {}
1107 
1108 #else  /* lint */
1109 
1110 /*
1111  * Level-15 interrupt prologue.
1112  */
1113        ENTRY_NP(pil15_interrupt)
1114        CPU_ADDR(%g1, %g2)
1115        rdpr    %tstate, %g6
1116        rdpr    %tpc, %g5
1117        btst    TSTATE_PRIV, %g6                ! trap from supervisor mode?
1118        bnz,a,pt %xcc, 1f
1119        stn     %g5, [%g1 + CPU_CPCPROFILE_PC]  ! if so, record kernel PC
1120        stn     %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
1121        ba      pil15_epilogue                  ! must be large-disp branch
1122        stn     %g0, [%g1 + CPU_CPCPROFILE_PC]  ! zero kernel PC
1123 1:     ba      pil15_epilogue                  ! must be large-disp branch
1124        stn     %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
1125        SET_SIZE(pil15_interrupt)
1126 
1127 #endif /* lint */
1128 
1129 #if defined(lint) || defined(__lint)
1130 
1131 /* ARGSUSED */
1132 uint64_t
1133 find_cpufrequency(volatile uchar_t *clock_ptr)
1134 {
1135         return (0);
1136 }
1137 
1138 #else   /* lint */
1139 
1140 #ifdef DEBUG
1141         .seg    ".text"
1142 find_cpufreq_panic:
1143         .asciz  "find_cpufrequency: interrupts already disabled on entry"
1144 #endif  /* DEBUG */
1145 
1146         ENTRY_NP(find_cpufrequency)
1147         rdpr    %pstate, %g1
1148 
1149 #ifdef DEBUG
1150         andcc   %g1, PSTATE_IE, %g0     ! If DEBUG, check that interrupts
1151         bnz     0f                      ! are currently enabled
1152         sethi   %hi(find_cpufreq_panic), %o1
1153         call    panic
1154         or      %o1, %lo(find_cpufreq_panic), %o0
1155 #endif  /* DEBUG */
1156 
1157 0:
1158         wrpr    %g1, PSTATE_IE, %pstate ! Disable interrupts
1159 3:
1160         ldub    [%o0], %o1              ! Read the number of seconds
1161         mov     %o1, %o2                ! remember initial value in %o2
1162 1:
1163         GET_NATIVE_TIME(%o3, %g4, %g5)
1164         cmp     %o1, %o2                ! did the seconds register roll over?
1165         be,pt   %icc, 1b                ! branch back if unchanged
1166         ldub    [%o0], %o2              !   delay: load the new seconds val
1167 
1168         brz,pn  %o2, 3b                 ! if the minutes just rolled over,
1169                                         ! the last second could have been
1170                                         ! inaccurate; try again.
1171         mov     %o2, %o4                !   delay: store init. val. in %o2
1172 2:
1173         GET_NATIVE_TIME(%o5, %g4, %g5)
1174         cmp     %o2, %o4                ! did the seconds register roll over?
1175         be,pt   %icc, 2b                ! branch back if unchanged
1176         ldub    [%o0], %o4              !   delay: load the new seconds val
1177 
1178         brz,pn  %o4, 0b                 ! if the minutes just rolled over,
1179                                         ! the last second could have been
1180                                         ! inaccurate; try again.
1181         wrpr    %g0, %g1, %pstate       !   delay: re-enable interrupts
1182 
1183         retl
1184         sub     %o5, %o3, %o0           ! return the difference in ticks
1185         SET_SIZE(find_cpufrequency)
1186 
1187 #endif  /* lint */
1188 
1189 #if defined(lint)
1190 /*
1191  * Prefetch a page_t for write or read, this assumes a linear
1192  * scan of sequential page_t's.
1193  */
1194 /*ARGSUSED*/
1195 void
1196 prefetch_page_w(void *pp)
1197 {}
1198 
1199 /*ARGSUSED*/
1200 void
1201 prefetch_page_r(void *pp)
1202 {}
1203 #else   /* lint */
1204 
1205 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1206         defined(SERRANO)
1207         !
1208         ! On US-III, the prefetch instruction queue is 8 entries deep.
1209         ! Also, prefetches for write put data in the E$, which has
1210         ! lines of 512 bytes for an 8MB cache. Each E$ line is further
1211         ! subblocked into 64 byte chunks.
1212         !
1213         ! Since prefetch can only bring in 64 bytes at a time (See Sparc
1214         ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1215         ! then 2 prefetches are required in order to bring an entire
1216         ! page into the E$.
1217         !
1218         ! Since the prefetch queue is 8 entries deep, we currently can
1219         ! only have 4 prefetches for page_t's outstanding. Thus, we
1220         ! prefetch n+4 ahead of where we are now: 
1221         !
1222         !      4 * sizeof(page_t)     -> 512
1223         !      4 * sizeof(page_t) +64 -> 576
1224         ! 
1225         ! Example
1226         ! =======
1227         ! contiguous page array in memory...
1228         !
1229         ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1230         ! ^         ^         ^         ^         ^    ^
1231         ! pp                                      |    pp+4*sizeof(page)+64
1232         !                                         |
1233         !                                         pp+4*sizeof(page)
1234         !
1235         !  Prefetch
1236         !   Queue
1237         ! +-------+<--- In this iteration, we're working with pp (AAA1),
1238         ! |Preftch|     but we enqueue prefetch for addr = XXX1
1239         ! | XXX1  | 
1240         ! +-------+<--- this queue slot will be a prefetch instruction for
1241         ! |Preftch|     for addr = pp + 4*sizeof(page_t) + 64 (or second
1242         ! | XXX2  |     half of page XXX)
1243         ! +-------+ 
1244         ! |Preftch|<-+- The next time around this function, we'll be
1245         ! | YYY1  |  |  working with pp = BBB1, but will be enqueueing
1246         ! +-------+  |  prefetches to for both halves of page YYY,
1247         ! |Preftch|  |  while both halves of page XXX are in transit
1248         ! | YYY2  |<-+  make their way into the E$.
1249         ! +-------+
1250         ! |Preftch|
1251         ! | ZZZ1  |
1252         ! +-------+
1253         ! .       .
1254         ! :       :
1255         !
1256         !  E$
1257         ! +============================================...
1258         ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1259         ! +============================================...
1260         ! |      |      |      |      |      |      |
1261         ! +============================================...
1262         ! .
1263         ! :
1264         !
1265         ! So we should expect the first four page accesses to stall
1266         ! while we warm up the cache, afterwhich, most of the pages
1267         ! will have their pp ready in the E$.
1268         ! 
1269         ! Also note that if sizeof(page_t) grows beyond 128, then 
1270         ! we'll need an additional prefetch to get an entire page
1271         ! into the E$, thus reducing the number of outstanding page
1272         ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1273         ! etc.
1274         !
1275         ! Cheetah+
1276         ! ========
1277         ! On Cheetah+ we use "#n_write" prefetches as these avoid
1278         ! unnecessary RTS->RTO bus transaction state change, and
1279         ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1280         ! PRM). On Cheetah, #n_write prefetches are reflected with
1281         ! RTS->RTO state transition regardless.
1282         !
1283 #define STRIDE1 512
1284 #define STRIDE2 576
1285 
1286 #if     STRIDE1 != (PAGE_SIZE * 4)
1287 #error  "STRIDE1 != (PAGE_SIZE * 4)"
1288 #endif  /* STRIDE1 != (PAGE_SIZE * 4) */
1289 
1290         ENTRY(prefetch_page_w)
1291         prefetch        [%o0+STRIDE1], #n_writes
1292         retl
1293         prefetch        [%o0+STRIDE2], #n_writes
1294         SET_SIZE(prefetch_page_w)
1295 
1296         !
1297         ! Note on CHEETAH to prefetch for read, we really use #one_write.
1298         ! This fetches to E$ (general use) rather than P$ (floating point use).
1299         !
1300         ENTRY(prefetch_page_r)
1301         prefetch        [%o0+STRIDE1], #one_write
1302         retl
1303         prefetch        [%o0+STRIDE2], #one_write
1304         SET_SIZE(prefetch_page_r)
1305 
1306 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1307 
1308         !
1309         ! UltraSparcII can have up to 3 prefetches outstanding.
1310         ! A page_t is 128 bytes (2 prefetches of 64 bytes each)
1311         ! So prefetch for pp + 1, which is
1312         !
1313         !       pp + sizeof(page_t)
1314         ! and
1315         !       pp + sizeof(page_t) + 64
1316         !
1317 #define STRIDE1 128
1318 #define STRIDE2 192
1319 
1320 #if     STRIDE1 != PAGE_SIZE
1321 #error  "STRIDE1 != PAGE_SIZE"
1322 #endif  /* STRIDE1 != PAGE_SIZE */
1323 
1324         ENTRY(prefetch_page_w)
1325         prefetch        [%o0+STRIDE1], #n_writes
1326         retl
1327         prefetch        [%o0+STRIDE2], #n_writes
1328         SET_SIZE(prefetch_page_w)
1329 
1330         ENTRY(prefetch_page_r)
1331         prefetch        [%o0+STRIDE1], #n_reads
1332         retl
1333         prefetch        [%o0+STRIDE2], #n_reads
1334         SET_SIZE(prefetch_page_r)
1335 
1336 #elif defined(OLYMPUS_C)
1337         !
1338         ! Prefetch strides for Olympus-C
1339         !
1340 
1341 #define STRIDE1 0x440
1342 #define STRIDE2 0x640
1343         
1344         ENTRY(prefetch_page_w)
1345         prefetch        [%o0+STRIDE1], #n_writes
1346         retl
1347         prefetch        [%o0+STRIDE2], #n_writes
1348         SET_SIZE(prefetch_page_w)
1349 
1350         ENTRY(prefetch_page_r)
1351         prefetch        [%o0+STRIDE1], #n_writes
1352         retl
1353         prefetch        [%o0+STRIDE2], #n_writes
1354         SET_SIZE(prefetch_page_r)
1355 #else   /* OLYMPUS_C */
1356 
1357 #error "You need to fix this for your new cpu type."
1358 
1359 #endif  /* OLYMPUS_C */
1360 
1361 #endif  /* lint */
1362 
1363 #if defined(lint)
1364 /*
1365  * Prefetch struct smap for write. 
1366  */
1367 /*ARGSUSED*/
1368 void
1369 prefetch_smap_w(void *smp)
1370 {}
1371 #else   /* lint */
1372 
1373 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1374         defined(SERRANO)
1375 
1376 #define PREFETCH_Q_LEN 8
1377 
1378 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1379 
1380 #define PREFETCH_Q_LEN 3
1381 
1382 #elif defined(OLYMPUS_C)
1383         !
1384         ! Use length of one for now.
1385         !
1386 #define PREFETCH_Q_LEN  1
1387 
1388 #else   /* OLYMPUS_C */
1389 
1390 #error You need to fix this for your new cpu type.
1391 
1392 #endif  /* OLYMPUS_C */
1393 
1394 #include <vm/kpm.h>
1395 
1396 #ifdef  SEGKPM_SUPPORT
1397 
1398 #define SMAP_SIZE 72
1399 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1400 
1401 #else   /* SEGKPM_SUPPORT */
1402 
1403         !
1404         ! The hardware will prefetch the 64 byte cache aligned block
1405         ! that contains the address specified in the prefetch instruction.
1406         ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1407         ! per pass will suffice as long as we prefetch far enough ahead to
1408         ! make sure we don't stall for the cases where the smap object
1409         ! spans multiple hardware prefetch blocks.  Let's prefetch as far
1410         ! ahead as the hardware will allow.
1411         !
1412         ! The smap array is processed with decreasing address pointers.
1413         !
1414 #define SMAP_SIZE 48
1415 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1416 
1417 #endif  /* SEGKPM_SUPPORT */
1418 
1419         ENTRY(prefetch_smap_w)
1420         retl
1421         prefetch        [%o0-SMAP_STRIDE], #n_writes
1422         SET_SIZE(prefetch_smap_w)
1423 
1424 #endif  /* lint */
1425 
1426 #if defined(lint) || defined(__lint)
1427 
1428 /* ARGSUSED */
1429 uint64_t
1430 getidsr(void)
1431 { return 0; }
1432 
1433 #else   /* lint */
1434 
1435         ENTRY_NP(getidsr)
1436         retl
1437         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1438         SET_SIZE(getidsr)
1439 
1440 #endif  /* lint */