1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include "assym.h" 26 27 /* 28 * General assembly language routines. 29 * It is the intent of this file to contain routines that are 30 * specific to cpu architecture. 31 */ 32 33 /* 34 * WARNING: If you add a fast trap handler which can be invoked by a 35 * non-privileged user, you may have to use the FAST_TRAP_DONE macro 36 * instead of "done" instruction to return back to the user mode. See 37 * comments for the "fast_trap_done" entry point for more information. 38 */ 39 #define FAST_TRAP_DONE \ 40 ba,a fast_trap_done 41 42 /* 43 * Override GET_NATIVE_TIME for the cpu module code. This is not 44 * guaranteed to be exactly one instruction, be careful of using 45 * the macro in delay slots. 46 * 47 * Do not use any instruction that modifies condition codes as the 48 * caller may depend on these to remain unchanged across the macro. 49 */ 50 #if defined(CHEETAH) || defined(OLYMPUS_C) 51 52 #define GET_NATIVE_TIME(out, scr1, scr2) \ 53 rd STICK, out 54 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 55 rd STICK, reg; \ 56 add reg, delta, reg; \ 57 wr reg, STICK 58 #define RD_TICKCMPR(out, scr) \ 59 rd STICK_COMPARE, out 60 #define WR_TICKCMPR(in, scr1, scr2, label) \ 61 wr in, STICK_COMPARE 62 63 #elif defined(HUMMINGBIRD) 64 #include <sys/spitregs.h> 65 66 /* 67 * the current hummingbird version of %stick and %stick_cmp 68 * were both implemented as (2) 32-bit locations in ASI_IO space; 69 * the hdwr should support atomic r/w; meanwhile: ugly alert! ... 70 * 71 * 64-bit opcodes are required, but move only 32-bits: 72 * 73 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst 74 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys 75 * 76 * reg equivalent [phys]ASI_IO 77 * ------------------ --------------- 78 * %stick_cmp low-32 0x1FE.0000.F060 79 * %stick_cmp high-32 0x1FE.0000.F068 80 * %stick low-32 0x1FE.0000.F070 81 * %stick high-32 0x1FE.0000.F078 82 */ 83 #define HSTC_LOW 0x60 /* stick_cmp low 32-bits */ 84 #define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */ 85 #define HST_LOW 0x70 /* stick low 32-bits */ 86 #define HST_HIGH 0x78 /* stick high 32-bits */ 87 #define HST_DIFF 0x08 /* low<-->high diff */ 88 89 /* 90 * Any change in the number of instructions in SETL41() 91 * will affect SETL41_OFF 92 */ 93 #define SETL41(reg, byte) \ 94 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \ 95 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \ 96 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \ 97 or reg, byte, reg; /* 0000.01FE.0000.F0xx */ 98 99 /* 100 * SETL41_OFF is used to calulate the relative PC value when a 101 * branch instruction needs to go over SETL41() macro 102 */ 103 #define SETL41_OFF 16 104 105 /* 106 * reading stick requires 2 loads, and there could be an intervening 107 * low-to-high 32-bit rollover resulting in a return value that is 108 * off by about (2 ^ 32); this rare case is prevented by re-reading 109 * the low-32 bits after the high-32 and verifying the "after" value 110 * is >= the "before" value; if not, increment the high-32 value. 111 * 112 * this method is limited to 1 rollover, and based on the fixed 113 * stick-frequency (5555555), requires the loads to complete within 114 * 773 seconds; incrementing the high-32 value will not overflow for 115 * about 52644 years. 116 * 117 * writing stick requires 2 stores; if the old/new low-32 value is 118 * near 0xffffffff, there could be another rollover (also rare). 119 * to prevent this, we first write a 0 to the low-32, then write 120 * new values to the high-32 then the low-32. 121 * 122 * When we detect a carry in the lower %stick register, we need to 123 * read HST_HIGH again. However at the point where we detect this, 124 * we need to rebuild the register address HST_HIGH.This involves more 125 * than one instructions and a branch is unavoidable. However, most of 126 * the time, there is no carry. So we take the penalty of a branch 127 * instruction only when there is carry (less frequent). 128 * 129 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41(). 130 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since 131 * addr already points to HST_LOW. 132 * 133 * NOTE: this method requires disabling interrupts before using 134 * DELTA_NATIVE_TIME. 135 */ 136 #define GET_NATIVE_TIME(out, scr, tmp) \ 137 SETL41(scr, HST_LOW); \ 138 ldxa [scr]ASI_IO, tmp; \ 139 inc HST_DIFF, scr; \ 140 ldxa [scr]ASI_IO, out; \ 141 dec HST_DIFF, scr; \ 142 ldxa [scr]ASI_IO, scr; \ 143 sub scr, tmp, tmp; \ 144 brlz,pn tmp, .-(SETL41_OFF+24); \ 145 sllx out, 32, out; \ 146 or out, scr, out 147 #define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \ 148 SETL41(addr, HST_LOW); \ 149 ldxa [addr]ASI_IO, tmp; \ 150 inc HST_DIFF, addr; \ 151 ldxa [addr]ASI_IO, high; \ 152 dec HST_DIFF, addr; \ 153 ldxa [addr]ASI_IO, low; \ 154 sub low, tmp, tmp; \ 155 brlz,pn tmp, .-24; \ 156 sllx high, 32, high; \ 157 or high, low, high; \ 158 add high, delta, high; \ 159 srl high, 0, low; \ 160 srlx high, 32, high; \ 161 stxa %g0, [addr]ASI_IO; \ 162 inc HST_DIFF, addr; \ 163 stxa high, [addr]ASI_IO; \ 164 dec HST_DIFF, addr; \ 165 stxa low, [addr]ASI_IO 166 #define RD_TICKCMPR(out, scr) \ 167 SETL41(scr, HSTC_LOW); \ 168 ldxa [scr]ASI_IO, out; \ 169 inc HST_DIFF, scr; \ 170 ldxa [scr]ASI_IO, scr; \ 171 sllx scr, 32, scr; \ 172 or scr, out, out 173 #define WR_TICKCMPR(in, scra, scrd, label) \ 174 SETL41(scra, HSTC_HIGH); \ 175 srlx in, 32, scrd; \ 176 stxa scrd, [scra]ASI_IO; \ 177 dec HST_DIFF, scra; \ 178 stxa in, [scra]ASI_IO 179 180 #else /* !CHEETAH && !HUMMINGBIRD */ 181 182 #define GET_NATIVE_TIME(out, scr1, scr2) \ 183 rdpr %tick, out 184 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \ 185 rdpr %tick, reg; \ 186 add reg, delta, reg; \ 187 wrpr reg, %tick 188 #define RD_TICKCMPR(out, scr) \ 189 rd TICK_COMPARE, out 190 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */ 191 /* 192 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules. 193 * The failure occurs only when the following instruction decodes to wr or 194 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE 195 * with a read, thus stalling the pipe and keeping following instructions 196 * from causing data corruption. Aligning to a quadword will ensure these 197 * two instructions are not split due to i$ misses. 198 */ 199 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \ 200 ba,a .bb_errata_1.label ;\ 201 .align 64 ;\ 202 .bb_errata_1.label: ;\ 203 wr cmpr, TICK_COMPARE ;\ 204 rd TICK_COMPARE, %g0 205 #else /* BB_ERRATA_1 */ 206 #define WR_TICKCMPR(in,scr1,scr2,label) \ 207 wr in, TICK_COMPARE 208 #endif /* BB_ERRATA_1 */ 209 210 #endif /* !CHEETAH && !HUMMINGBIRD */ 211 212 #include <sys/clock.h> 213 214 215 #include <sys/asm_linkage.h> 216 #include <sys/privregs.h> 217 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */ 218 #include <sys/machthread.h> 219 #include <sys/clock.h> 220 #include <sys/intreg.h> 221 #include <sys/psr_compat.h> 222 #include <sys/isa_defs.h> 223 #include <sys/dditypes.h> 224 #include <sys/intr.h> 225 226 #include "assym.h" 227 228 ENTRY(get_impl) 229 GET_CPU_IMPL(%o0) 230 retl 231 nop 232 SET_SIZE(get_impl) 233 234 ENTRY_NP(tickcmpr_set) 235 ! get 64-bit clock_cycles interval 236 mov %o0, %o2 237 mov 8, %o3 ! A reasonable initial step size 238 1: 239 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR 240 241 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the 242 sllx %o0, 1, %o0 ! value we wrote was in the future. 243 srlx %o0, 1, %o0 244 245 cmp %o2, %o0 ! If the value we wrote was in the 246 bg,pt %xcc, 2f ! future, then blow out of here. 247 sllx %o3, 1, %o3 ! If not, then double our step size, 248 ba,pt %xcc, 1b ! and take another lap. 249 add %o0, %o3, %o2 ! 250 2: 251 retl 252 nop 253 SET_SIZE(tickcmpr_set) 254 255 ENTRY_NP(tickcmpr_disable) 256 mov 1, %g1 257 sllx %g1, TICKINT_DIS_SHFT, %o0 258 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR 259 retl 260 nop 261 SET_SIZE(tickcmpr_disable) 262 263 #ifdef DEBUG 264 .seg ".text" 265 tick_write_panic: 266 .asciz "tick_write_delta: interrupts already disabled on entry" 267 #endif /* DEBUG */ 268 269 ENTRY_NP(tick_write_delta) 270 rdpr %pstate, %g1 271 #ifdef DEBUG 272 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 273 bnz 0f ! aren't already disabled. 274 sethi %hi(tick_write_panic), %o1 275 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller 276 call panic 277 or %i1, %lo(tick_write_panic), %o0 278 #endif /* DEBUG */ 279 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 280 mov %o0, %o2 281 ba 0f ! Branch to cache line-aligned instr. 282 nop 283 .align 16 284 0: nop ! The next 3 instructions are now hot. 285 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick 286 287 retl ! Return 288 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts 289 290 ENTRY_NP(tickcmpr_disabled) 291 RD_TICKCMPR(%g1, %o0) 292 retl 293 srlx %g1, TICKINT_DIS_SHFT, %o0 294 SET_SIZE(tickcmpr_disabled) 295 296 /* 297 * Get current tick 298 */ 299 300 ENTRY(gettick) 301 ALTENTRY(randtick) 302 GET_NATIVE_TIME(%o0, %o2, %o3) 303 retl 304 nop 305 SET_SIZE(randtick) 306 SET_SIZE(gettick) 307 308 309 /* 310 * Return the counter portion of the tick register. 311 */ 312 313 ENTRY_NP(gettick_counter) 314 rdpr %tick, %o0 315 sllx %o0, 1, %o0 316 retl 317 srlx %o0, 1, %o0 ! shake off npt bit 318 SET_SIZE(gettick_counter) 319 320 /* 321 * Provide a C callable interface to the trap that reads the hi-res timer. 322 * Returns 64-bit nanosecond timestamp in %o0 and %o1. 323 */ 324 325 ENTRY_NP(gethrtime) 326 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) 327 ! %g1 = hrtime 328 retl 329 mov %g1, %o0 330 SET_SIZE(gethrtime) 331 332 ENTRY_NP(gethrtime_unscaled) 333 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 334 retl 335 mov %g1, %o0 336 SET_SIZE(gethrtime_unscaled) 337 338 ENTRY_NP(gethrtime_waitfree) 339 ALTENTRY(dtrace_gethrtime) 340 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time 341 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) 342 retl 343 mov %g1, %o0 344 SET_SIZE(dtrace_gethrtime) 345 SET_SIZE(gethrtime_waitfree) 346 347 ENTRY(gethrtime_max) 348 NATIVE_TIME_MAX(%g1) 349 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1) 350 351 ! hrtime_t's are signed, max hrtime_t must be positive 352 mov -1, %o2 353 brlz,a %g1, 1f 354 srlx %o2, 1, %g1 355 1: 356 retl 357 mov %g1, %o0 358 SET_SIZE(gethrtime_max) 359 360 ENTRY(scalehrtime) 361 ldx [%o0], %o1 362 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3) 363 retl 364 stx %o1, [%o0] 365 SET_SIZE(scalehrtime) 366 367 /* 368 * Fast trap to return a timestamp, uses trap window, leaves traps 369 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1. 370 * 371 * This is the handler for the ST_GETHRTIME trap. 372 */ 373 374 ENTRY_NP(get_timestamp) 375 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime 376 srlx %g1, 32, %o0 ! %o0 = hi32(%g1) 377 srl %g1, 0, %o1 ! %o1 = lo32(%g1) 378 FAST_TRAP_DONE 379 SET_SIZE(get_timestamp) 380 381 /* 382 * Macro to convert GET_HRESTIME() bits into a timestamp. 383 * 384 * We use two separate macros so that the platform-dependent GET_HRESTIME() 385 * can be as small as possible; CONV_HRESTIME() implements the generic part. 386 */ 387 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \ 388 brz,pt adj, 3f; /* no adjustments, it's easy */ \ 389 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \ 390 brlz,pn adj, 2f; /* if hrestime_adj negative */ \ 391 srlx nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \ 392 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \ 393 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \ 394 ba 3f; /* go convert to sec/nsec */ \ 395 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \ 396 2: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \ 397 bge,a,pt %xcc, 3f; /* is adj less negative? */ \ 398 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \ 399 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \ 400 3: cmp hrestnsec, nano; /* more than a billion? */ \ 401 bl,pt %xcc, 4f; /* if not, we're done */ \ 402 nop; /* delay: do nothing :( */ \ 403 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \ 404 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \ 405 ba,a 3b; /* check >= billion again */ \ 406 4: 407 408 ENTRY_NP(gethrestime) 409 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 410 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) 411 stn %o1, [%o0] 412 retl 413 stn %o2, [%o0 + CLONGSIZE] 414 SET_SIZE(gethrestime) 415 416 /* 417 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime 418 * seconds. 419 */ 420 ENTRY_NP(gethrestime_sec) 421 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) 422 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) 423 retl ! %o0 current hrestime seconds 424 nop 425 SET_SIZE(gethrestime_sec) 426 427 /* 428 * Returns the hrestime on the last tick. This is simpler than gethrestime() 429 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick() 430 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME, 431 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't 432 * rely on load dependencies to effect the membar #LoadLoad, instead declaring 433 * it explicitly.) 434 */ 435 ENTRY_NP(gethrestime_lasttick) 436 sethi %hi(hres_lock), %o1 437 0: 438 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value 439 membar #LoadLoad ! Load of lock must complete 440 andn %o2, 1, %o2 ! Mask off lowest bit 441 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds. 442 add %o1, %lo(hrestime), %o4 443 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds. 444 membar #LoadLoad ! All loads must complete 445 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value 446 cmp %o3, %o2 ! If lock is locked or has 447 bne 0b ! changed, retry. 448 stn %g1, [%o0] ! Delay: store seconds 449 retl 450 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds 451 SET_SIZE(gethrestime_lasttick) 452 453 /* 454 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1. 455 * 456 * This is the handler for the ST_GETHRESTIME trap. 457 */ 458 459 ENTRY_NP(get_hrestime) 460 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) 461 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) 462 FAST_TRAP_DONE 463 SET_SIZE(get_hrestime) 464 465 /* 466 * Fast trap to return lwp virtual time, uses trap window, leaves traps 467 * disabled. Returns a 64-bit number in %o0:%o1, which is the number 468 * of nanoseconds consumed. 469 * 470 * This is the handler for the ST_GETHRVTIME trap. 471 * 472 * Register usage: 473 * %o0, %o1 = return lwp virtual time 474 * %o2 = CPU/thread 475 * %o3 = lwp 476 * %g1 = scratch 477 * %g5 = scratch 478 */ 479 ENTRY_NP(get_virtime) 480 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks 481 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 482 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 483 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 484 485 /* 486 * Subtract start time of current microstate from time 487 * of day to get increment for lwp virtual time. 488 */ 489 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start 490 sub %g5, %g1, %g5 491 492 /* 493 * Add current value of ms_acct[LMS_USER] 494 */ 495 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER] 496 add %g5, %g1, %g5 497 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0) 498 499 srl %g5, 0, %o1 ! %o1 = lo32(%g5) 500 srlx %g5, 32, %o0 ! %o0 = hi32(%g5) 501 502 FAST_TRAP_DONE 503 SET_SIZE(get_virtime) 504 505 506 507 .seg ".text" 508 hrtime_base_panic: 509 .asciz "hrtime_base stepping back" 510 511 512 ENTRY_NP(hres_tick) 513 save %sp, -SA(MINFRAME), %sp ! get a new window 514 515 sethi %hi(hrestime), %l4 516 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking 517 7: tst %l5 518 bz,pt %xcc, 8f ! if we got it, drive on 519 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor 520 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 521 9: tst %l5 522 bz,a,pn %xcc, 7b 523 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 524 ba,pt %xcc, 9b 525 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 526 8: 527 membar #StoreLoad|#StoreStore 528 529 ! 530 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). 531 ! 532 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base 533 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time 534 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current 535 ! convert native time to nsecs 536 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) 537 538 sub %l0, %g1, %i1 ! get accurate nsec delta 539 540 ldx [%l4 + %lo(hrtime_base)], %l1 541 cmp %l1, %l0 542 bg,pn %xcc, 9f 543 nop 544 545 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base 546 547 ! 548 ! apply adjustment, if any 549 ! 550 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj 551 brz %l0, 2f 552 ! hrestime_adj == 0 ? 553 ! yes, skip adjustments 554 clr %l5 ! delay: set adj to zero 555 tst %l0 ! is hrestime_adj >= 0 ? 556 bge,pt %xcc, 1f ! yes, go handle positive case 557 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj 558 559 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ? 560 bl,pt %xcc, 2f ! yes, use current adj 561 neg %l5 ! delay: %l5 = -adj 562 ba,pt %xcc, 2f 563 mov %l0, %l5 ! no, so set adj = hrestime_adj 564 1: 565 subcc %l0, %l5, %g0 ! hrestime_adj < adj ? 566 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj 567 mov %l0, %l5 ! delay: adj = hrestime_adj 568 2: 569 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta 570 sub %l0, %l5, %l0 ! timedelta -= adj 571 572 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta 573 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta 574 575 or %l4, %lo(hrestime), %l2 576 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec 577 ldn [%l2 + CLONGSIZE], %i3 578 add %i3, %l5, %i3 ! hrestime.nsec += adj 579 add %i3, %i1, %i3 ! hrestime.nsec += nslt 580 581 set NANOSEC, %l5 ! %l5 = NANOSEC 582 cmp %i3, %l5 583 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC 584 sethi %hi(one_sec), %i1 ! delay 585 add %i2, 0x1, %i2 ! hrestime.tv_sec++ 586 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC 587 mov 0x1, %l5 588 st %l5, [%i1 + %lo(one_sec)] 589 5: 590 stn %i2, [%l2] 591 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime 592 593 membar #StoreStore 594 595 ld [%l4 + %lo(hres_lock)], %i1 596 inc %i1 ! release lock 597 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock 598 599 ret 600 restore 601 602 9: 603 ! 604 ! release hres_lock 605 ! 606 ld [%l4 + %lo(hres_lock)], %i1 607 inc %i1 608 st %i1, [%l4 + %lo(hres_lock)] 609 610 sethi %hi(hrtime_base_panic), %o0 611 call panic 612 or %o0, %lo(hrtime_base_panic), %o0 613 614 SET_SIZE(hres_tick) 615 616 .seg ".text" 617 kstat_q_panic_msg: 618 .asciz "kstat_q_exit: qlen == 0" 619 620 ENTRY(kstat_q_panic) 621 save %sp, -SA(MINFRAME), %sp 622 sethi %hi(kstat_q_panic_msg), %o0 623 call panic 624 or %o0, %lo(kstat_q_panic_msg), %o0 625 /*NOTREACHED*/ 626 SET_SIZE(kstat_q_panic) 627 628 #define BRZPN brz,pn 629 #define BRZPT brz,pt 630 631 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \ 632 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 633 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 634 QBR %o1, QZERO; /* done if qlen == 0 */ \ 635 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 636 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 637 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 638 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 639 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 640 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 641 add %o4, %o2, %o4; /* %o4 = new time */ \ 642 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 643 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 644 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 645 QRETURN; \ 646 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 647 648 #if !defined(DEBUG) 649 /* 650 * same as KSTAT_Q_UPDATE but without: 651 * QBR %o1, QZERO; 652 * to be used only with non-debug build. mimics ASSERT() behaviour. 653 */ 654 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \ 655 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \ 656 QOP %o1, 1, %o2; /* %o2 = new qlen */ \ 657 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \ 658 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \ 659 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \ 660 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \ 661 sub %g1, %o3, %o2; /* %o2 = time delta */ \ 662 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \ 663 add %o4, %o2, %o4; /* %o4 = new time */ \ 664 add %o5, %o3, %o5; /* %o5 = new lentime */ \ 665 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \ 666 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \ 667 QRETURN; \ 668 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */ 669 #endif 670 671 .align 16 672 ENTRY(kstat_waitq_enter) 673 GET_NATIVE_TIME(%g1, %g2, %g3) 674 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 675 SET_SIZE(kstat_waitq_enter) 676 677 .align 16 678 ENTRY(kstat_waitq_exit) 679 GET_NATIVE_TIME(%g1, %g2, %g3) 680 #if defined(DEBUG) 681 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) 682 #else 683 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W) 684 #endif 685 SET_SIZE(kstat_waitq_exit) 686 687 .align 16 688 ENTRY(kstat_runq_enter) 689 GET_NATIVE_TIME(%g1, %g2, %g3) 690 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 691 SET_SIZE(kstat_runq_enter) 692 693 .align 16 694 ENTRY(kstat_runq_exit) 695 GET_NATIVE_TIME(%g1, %g2, %g3) 696 #if defined(DEBUG) 697 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) 698 #else 699 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R) 700 #endif 701 SET_SIZE(kstat_runq_exit) 702 703 .align 16 704 ENTRY(kstat_waitq_to_runq) 705 GET_NATIVE_TIME(%g1, %g2, %g3) 706 #if defined(DEBUG) 707 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) 708 #else 709 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W) 710 #endif 711 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) 712 SET_SIZE(kstat_waitq_to_runq) 713 714 .align 16 715 ENTRY(kstat_runq_back_to_waitq) 716 GET_NATIVE_TIME(%g1, %g2, %g3) 717 #if defined(DEBUG) 718 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) 719 #else 720 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R) 721 #endif 722 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) 723 SET_SIZE(kstat_runq_back_to_waitq) 724 725 /* 726 * -- WARNING -- 727 * 728 * The following variables MUST be together on a 128-byte boundary. 729 * In addition to the primary performance motivation (having them all 730 * on the same cache line(s)), code here and in the GET*TIME() macros 731 * assumes that they all have the same high 22 address bits (so 732 * there's only one sethi). 733 */ 734 .seg ".data" 735 .global timedelta, hres_last_tick, hrestime, hrestime_adj 736 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick 737 .global nsec_shift, adj_shift 738 739 /* XXX - above comment claims 128-bytes is necessary */ 740 .align 64 741 timedelta: 742 .word 0, 0 /* int64_t */ 743 hres_last_tick: 744 .word 0, 0 /* hrtime_t */ 745 hrestime: 746 .nword 0, 0 /* 2 longs */ 747 hrestime_adj: 748 .word 0, 0 /* int64_t */ 749 hres_lock: 750 .word 0 751 nsec_scale: 752 .word 0 753 hrtime_base: 754 .word 0, 0 755 traptrace_use_stick: 756 .word 0 757 nsec_shift: 758 .word NSEC_SHIFT 759 adj_shift: 760 .word ADJ_SHIFT 761 762 763 /* 764 * drv_usecwait(clock_t n) [DDI/DKI - section 9F] 765 * usec_delay(int n) [compatibility - should go one day] 766 * Delay by spinning. 767 * 768 * delay for n microseconds. numbers <= 0 delay 1 usec 769 * 770 * With UltraSPARC-III the combination of supporting mixed-speed CPUs 771 * and variable clock rate for power management requires that we 772 * use %stick to implement this routine. 773 * 774 * For OPL platforms that support the "sleep" instruction, we 775 * conditionally (ifdef'ed) insert a "sleep" instruction in 776 * the loop. Note that theoritically we should have move (duplicated) 777 * the code down to spitfire/us3/opl specific asm files - but this 778 * is alot of code duplication just to add one "sleep" instruction. 779 * We chose less code duplication for this. 780 */ 781 782 ENTRY(drv_usecwait) 783 ALTENTRY(usec_delay) 784 brlez,a,pn %o0, 0f 785 mov 1, %o0 786 0: 787 sethi %hi(sticks_per_usec), %o1 788 lduw [%o1 + %lo(sticks_per_usec)], %o1 789 mulx %o1, %o0, %o1 ! Scale usec to ticks 790 inc %o1 ! We don't start on a tick edge 791 GET_NATIVE_TIME(%o2, %o3, %o4) 792 add %o1, %o2, %o1 793 794 1: 795 #ifdef _OPL 796 .word 0x81b01060 ! insert "sleep" instruction 797 #endif /* _OPL */ ! use byte code for now 798 cmp %o1, %o2 799 GET_NATIVE_TIME(%o2, %o3, %o4) 800 bgeu,pt %xcc, 1b 801 nop 802 retl 803 nop 804 SET_SIZE(usec_delay) 805 SET_SIZE(drv_usecwait) 806 807 /* 808 * Level-14 interrupt prologue. 809 */ 810 ENTRY_NP(pil14_interrupt) 811 CPU_ADDR(%g1, %g2) 812 rdpr %pil, %g6 ! %g6 = interrupted PIL 813 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL 814 rdpr %tstate, %g6 815 rdpr %tpc, %g5 816 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 817 bnz,a,pt %xcc, 1f 818 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC 819 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC 820 ba pil_interrupt_common ! must be large-disp branch 821 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC 822 1: ba pil_interrupt_common ! must be large-disp branch 823 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC 824 SET_SIZE(pil14_interrupt) 825 826 ENTRY_NP(tick_rtt) 827 ! 828 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is 829 ! disabled. If TICK_COMPARE is enabled, we know that we need to 830 ! reenqueue the interrupt request structure. We'll then check TICKINT 831 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE 832 ! interrupt. In this case, TICK_COMPARE may have been rewritten 833 ! recently; we'll compare %o5 to the current time to verify that it's 834 ! in the future. 835 ! 836 ! Note that %o5 is live until after 1f. 837 ! XXX - there is a subroutine call while %o5 is live! 838 ! 839 RD_TICKCMPR(%o5, %g1) 840 srlx %o5, TICKINT_DIS_SHFT, %g1 841 brnz,pt %g1, 2f 842 nop 843 844 rdpr %pstate, %g5 845 andn %g5, PSTATE_IE, %g1 846 wrpr %g0, %g1, %pstate ! Disable vec interrupts 847 848 sethi %hi(cbe_level14_inum), %o1 849 ldx [%o1 + %lo(cbe_level14_inum)], %o1 850 call intr_enqueue_req ! preserves %o5 and %g5 851 mov PIL_14, %o0 852 853 ! Check SOFTINT for TICKINT/STICKINT 854 rd SOFTINT, %o4 855 set (TICK_INT_MASK | STICK_INT_MASK), %o0 856 andcc %o4, %o0, %g0 857 bz,a,pn %icc, 2f 858 wrpr %g0, %g5, %pstate ! Enable vec interrupts 859 860 ! clear TICKINT/STICKINT 861 wr %o0, CLEAR_SOFTINT 862 863 ! 864 ! Now that we've cleared TICKINT, we can reread %tick and confirm 865 ! that the value we programmed is still in the future. If it isn't, 866 ! we need to reprogram TICK_COMPARE to fire as soon as possible. 867 ! 868 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 869 sllx %o0, 1, %o0 ! Clear the DIS bit 870 srlx %o0, 1, %o0 871 cmp %o5, %o0 ! In the future? 872 bg,a,pt %xcc, 2f ! Yes, drive on. 873 wrpr %g0, %g5, %pstate ! delay: enable vec intr 874 875 ! 876 ! If we're here, then we have programmed TICK_COMPARE with a %tick 877 ! which is in the past; we'll now load an initial step size, and loop 878 ! until we've managed to program TICK_COMPARE to fire in the future. 879 ! 880 mov 8, %o4 ! 8 = arbitrary inital step 881 1: add %o0, %o4, %o5 ! Add the step 882 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR 883 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick 884 sllx %o0, 1, %o0 ! Clear the DIS bit 885 srlx %o0, 1, %o0 886 cmp %o5, %o0 ! In the future? 887 bg,a,pt %xcc, 2f ! Yes, drive on. 888 wrpr %g0, %g5, %pstate ! delay: enable vec intr 889 ba 1b ! No, try again. 890 sllx %o4, 1, %o4 ! delay: double step size 891 892 2: ba current_thread_complete 893 nop 894 SET_SIZE(tick_rtt) 895 896 /* 897 * Level-15 interrupt prologue. 898 */ 899 ENTRY_NP(pil15_interrupt) 900 CPU_ADDR(%g1, %g2) 901 rdpr %tstate, %g6 902 rdpr %tpc, %g5 903 btst TSTATE_PRIV, %g6 ! trap from supervisor mode? 904 bnz,a,pt %xcc, 1f 905 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC 906 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC 907 ba pil15_epilogue ! must be large-disp branch 908 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC 909 1: ba pil15_epilogue ! must be large-disp branch 910 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC 911 SET_SIZE(pil15_interrupt) 912 913 #ifdef DEBUG 914 .seg ".text" 915 find_cpufreq_panic: 916 .asciz "find_cpufrequency: interrupts already disabled on entry" 917 #endif /* DEBUG */ 918 919 ENTRY_NP(find_cpufrequency) 920 rdpr %pstate, %g1 921 922 #ifdef DEBUG 923 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts 924 bnz 0f ! are currently enabled 925 sethi %hi(find_cpufreq_panic), %o1 926 call panic 927 or %o1, %lo(find_cpufreq_panic), %o0 928 #endif /* DEBUG */ 929 930 0: 931 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts 932 3: 933 ldub [%o0], %o1 ! Read the number of seconds 934 mov %o1, %o2 ! remember initial value in %o2 935 1: 936 GET_NATIVE_TIME(%o3, %g4, %g5) 937 cmp %o1, %o2 ! did the seconds register roll over? 938 be,pt %icc, 1b ! branch back if unchanged 939 ldub [%o0], %o2 ! delay: load the new seconds val 940 941 brz,pn %o2, 3b ! if the minutes just rolled over, 942 ! the last second could have been 943 ! inaccurate; try again. 944 mov %o2, %o4 ! delay: store init. val. in %o2 945 2: 946 GET_NATIVE_TIME(%o5, %g4, %g5) 947 cmp %o2, %o4 ! did the seconds register roll over? 948 be,pt %icc, 2b ! branch back if unchanged 949 ldub [%o0], %o4 ! delay: load the new seconds val 950 951 brz,pn %o4, 0b ! if the minutes just rolled over, 952 ! the last second could have been 953 ! inaccurate; try again. 954 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts 955 956 retl 957 sub %o5, %o3, %o0 ! return the difference in ticks 958 SET_SIZE(find_cpufrequency) 959 960 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 961 defined(SERRANO) 962 ! 963 ! On US-III, the prefetch instruction queue is 8 entries deep. 964 ! Also, prefetches for write put data in the E$, which has 965 ! lines of 512 bytes for an 8MB cache. Each E$ line is further 966 ! subblocked into 64 byte chunks. 967 ! 968 ! Since prefetch can only bring in 64 bytes at a time (See Sparc 969 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes, 970 ! then 2 prefetches are required in order to bring an entire 971 ! page into the E$. 972 ! 973 ! Since the prefetch queue is 8 entries deep, we currently can 974 ! only have 4 prefetches for page_t's outstanding. Thus, we 975 ! prefetch n+4 ahead of where we are now: 976 ! 977 ! 4 * sizeof(page_t) -> 512 978 ! 4 * sizeof(page_t) +64 -> 576 979 ! 980 ! Example 981 ! ======= 982 ! contiguous page array in memory... 983 ! 984 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|... 985 ! ^ ^ ^ ^ ^ ^ 986 ! pp | pp+4*sizeof(page)+64 987 ! | 988 ! pp+4*sizeof(page) 989 ! 990 ! Prefetch 991 ! Queue 992 ! +-------+<--- In this iteration, we're working with pp (AAA1), 993 ! |Preftch| but we enqueue prefetch for addr = XXX1 994 ! | XXX1 | 995 ! +-------+<--- this queue slot will be a prefetch instruction for 996 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second 997 ! | XXX2 | half of page XXX) 998 ! +-------+ 999 ! |Preftch|<-+- The next time around this function, we'll be 1000 ! | YYY1 | | working with pp = BBB1, but will be enqueueing 1001 ! +-------+ | prefetches to for both halves of page YYY, 1002 ! |Preftch| | while both halves of page XXX are in transit 1003 ! | YYY2 |<-+ make their way into the E$. 1004 ! +-------+ 1005 ! |Preftch| 1006 ! | ZZZ1 | 1007 ! +-------+ 1008 ! . . 1009 ! : : 1010 ! 1011 ! E$ 1012 ! +============================================... 1013 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 | 1014 ! +============================================... 1015 ! | | | | | | | 1016 ! +============================================... 1017 ! . 1018 ! : 1019 ! 1020 ! So we should expect the first four page accesses to stall 1021 ! while we warm up the cache, afterwhich, most of the pages 1022 ! will have their pp ready in the E$. 1023 ! 1024 ! Also note that if sizeof(page_t) grows beyond 128, then 1025 ! we'll need an additional prefetch to get an entire page 1026 ! into the E$, thus reducing the number of outstanding page 1027 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots) 1028 ! etc. 1029 ! 1030 ! Cheetah+ 1031 ! ======== 1032 ! On Cheetah+ we use "#n_write" prefetches as these avoid 1033 ! unnecessary RTS->RTO bus transaction state change, and 1034 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta 1035 ! PRM). On Cheetah, #n_write prefetches are reflected with 1036 ! RTS->RTO state transition regardless. 1037 ! 1038 #define STRIDE1 512 1039 #define STRIDE2 576 1040 1041 #if STRIDE1 != (PAGE_SIZE * 4) 1042 #error "STRIDE1 != (PAGE_SIZE * 4)" 1043 #endif /* STRIDE1 != (PAGE_SIZE * 4) */ 1044 1045 ENTRY(prefetch_page_w) 1046 prefetch [%o0+STRIDE1], #n_writes 1047 retl 1048 prefetch [%o0+STRIDE2], #n_writes 1049 SET_SIZE(prefetch_page_w) 1050 1051 ! 1052 ! Note on CHEETAH to prefetch for read, we really use #one_write. 1053 ! This fetches to E$ (general use) rather than P$ (floating point use). 1054 ! 1055 ENTRY(prefetch_page_r) 1056 prefetch [%o0+STRIDE1], #one_write 1057 retl 1058 prefetch [%o0+STRIDE2], #one_write 1059 SET_SIZE(prefetch_page_r) 1060 1061 #elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1062 1063 ! 1064 ! UltraSparcII can have up to 3 prefetches outstanding. 1065 ! A page_t is 128 bytes (2 prefetches of 64 bytes each) 1066 ! So prefetch for pp + 1, which is 1067 ! 1068 ! pp + sizeof(page_t) 1069 ! and 1070 ! pp + sizeof(page_t) + 64 1071 ! 1072 #define STRIDE1 128 1073 #define STRIDE2 192 1074 1075 #if STRIDE1 != PAGE_SIZE 1076 #error "STRIDE1 != PAGE_SIZE" 1077 #endif /* STRIDE1 != PAGE_SIZE */ 1078 1079 ENTRY(prefetch_page_w) 1080 prefetch [%o0+STRIDE1], #n_writes 1081 retl 1082 prefetch [%o0+STRIDE2], #n_writes 1083 SET_SIZE(prefetch_page_w) 1084 1085 ENTRY(prefetch_page_r) 1086 prefetch [%o0+STRIDE1], #n_reads 1087 retl 1088 prefetch [%o0+STRIDE2], #n_reads 1089 SET_SIZE(prefetch_page_r) 1090 1091 #elif defined(OLYMPUS_C) 1092 ! 1093 ! Prefetch strides for Olympus-C 1094 ! 1095 1096 #define STRIDE1 0x440 1097 #define STRIDE2 0x640 1098 1099 ENTRY(prefetch_page_w) 1100 prefetch [%o0+STRIDE1], #n_writes 1101 retl 1102 prefetch [%o0+STRIDE2], #n_writes 1103 SET_SIZE(prefetch_page_w) 1104 1105 ENTRY(prefetch_page_r) 1106 prefetch [%o0+STRIDE1], #n_writes 1107 retl 1108 prefetch [%o0+STRIDE2], #n_writes 1109 SET_SIZE(prefetch_page_r) 1110 #else /* OLYMPUS_C */ 1111 1112 #error "You need to fix this for your new cpu type." 1113 1114 #endif /* OLYMPUS_C */ 1115 1116 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \ 1117 defined(SERRANO) 1118 1119 #define PREFETCH_Q_LEN 8 1120 1121 #elif defined(SPITFIRE) || defined(HUMMINGBIRD) 1122 1123 #define PREFETCH_Q_LEN 3 1124 1125 #elif defined(OLYMPUS_C) 1126 ! 1127 ! Use length of one for now. 1128 ! 1129 #define PREFETCH_Q_LEN 1 1130 1131 #else /* OLYMPUS_C */ 1132 1133 #error You need to fix this for your new cpu type. 1134 1135 #endif /* OLYMPUS_C */ 1136 1137 #include <vm/kpm.h> 1138 1139 #ifdef SEGKPM_SUPPORT 1140 1141 #define SMAP_SIZE 72 1142 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64) 1143 1144 #else /* SEGKPM_SUPPORT */ 1145 1146 ! 1147 ! The hardware will prefetch the 64 byte cache aligned block 1148 ! that contains the address specified in the prefetch instruction. 1149 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch 1150 ! per pass will suffice as long as we prefetch far enough ahead to 1151 ! make sure we don't stall for the cases where the smap object 1152 ! spans multiple hardware prefetch blocks. Let's prefetch as far 1153 ! ahead as the hardware will allow. 1154 ! 1155 ! The smap array is processed with decreasing address pointers. 1156 ! 1157 #define SMAP_SIZE 48 1158 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE) 1159 1160 #endif /* SEGKPM_SUPPORT */ 1161 1162 ENTRY(prefetch_smap_w) 1163 retl 1164 prefetch [%o0-SMAP_STRIDE], #n_writes 1165 SET_SIZE(prefetch_smap_w) 1166 1167 ENTRY_NP(getidsr) 1168 retl 1169 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0 1170 SET_SIZE(getidsr) 1171