5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #if !defined(lint)
26 #include "assym.h"
27 #endif /* !lint */
28
29 /*
30 * General assembly language routines.
31 * It is the intent of this file to contain routines that are
32 * specific to cpu architecture.
33 */
34
35 /*
36 * WARNING: If you add a fast trap handler which can be invoked by a
37 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
38 * instead of "done" instruction to return back to the user mode. See
39 * comments for the "fast_trap_done" entry point for more information.
40 */
41 #define FAST_TRAP_DONE \
42 ba,a fast_trap_done
43
44 /*
45 * Override GET_NATIVE_TIME for the cpu module code. This is not
46 * guaranteed to be exactly one instruction, be careful of using
47 * the macro in delay slots.
196 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
197 * with a read, thus stalling the pipe and keeping following instructions
198 * from causing data corruption. Aligning to a quadword will ensure these
199 * two instructions are not split due to i$ misses.
200 */
201 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
202 ba,a .bb_errata_1.label ;\
203 .align 64 ;\
204 .bb_errata_1.label: ;\
205 wr cmpr, TICK_COMPARE ;\
206 rd TICK_COMPARE, %g0
207 #else /* BB_ERRATA_1 */
208 #define WR_TICKCMPR(in,scr1,scr2,label) \
209 wr in, TICK_COMPARE
210 #endif /* BB_ERRATA_1 */
211
212 #endif /* !CHEETAH && !HUMMINGBIRD */
213
214 #include <sys/clock.h>
215
216 #if defined(lint)
217 #include <sys/types.h>
218 #include <sys/scb.h>
219 #include <sys/systm.h>
220 #include <sys/regset.h>
221 #include <sys/sunddi.h>
222 #include <sys/lockstat.h>
223 #endif /* lint */
224
225
226 #include <sys/asm_linkage.h>
227 #include <sys/privregs.h>
228 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
229 #include <sys/machthread.h>
230 #include <sys/clock.h>
231 #include <sys/intreg.h>
232 #include <sys/psr_compat.h>
233 #include <sys/isa_defs.h>
234 #include <sys/dditypes.h>
235 #include <sys/intr.h>
236
237 #if !defined(lint)
238 #include "assym.h"
239 #endif /* !lint */
240
241 #if defined(lint)
242
243 uint_t
244 get_impl(void)
245 { return (0); }
246
247 #else /* lint */
248
249 ENTRY(get_impl)
250 GET_CPU_IMPL(%o0)
251 retl
252 nop
253 SET_SIZE(get_impl)
254
255 #endif /* lint */
256
257 #if defined(lint)
258 /*
259 * Softint generated when counter field of tick reg matches value field
260 * of tick_cmpr reg
261 */
262 /*ARGSUSED*/
263 void
264 tickcmpr_set(uint64_t clock_cycles)
265 {}
266
267 #else /* lint */
268
269 ENTRY_NP(tickcmpr_set)
270 ! get 64-bit clock_cycles interval
271 mov %o0, %o2
272 mov 8, %o3 ! A reasonable initial step size
273 1:
274 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR
275
276 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the
277 sllx %o0, 1, %o0 ! value we wrote was in the future.
278 srlx %o0, 1, %o0
279
280 cmp %o2, %o0 ! If the value we wrote was in the
281 bg,pt %xcc, 2f ! future, then blow out of here.
282 sllx %o3, 1, %o3 ! If not, then double our step size,
283 ba,pt %xcc, 1b ! and take another lap.
284 add %o0, %o3, %o2 !
285 2:
286 retl
287 nop
288 SET_SIZE(tickcmpr_set)
289
290 #endif /* lint */
291
292 #if defined(lint)
293
294 void
295 tickcmpr_disable(void)
296 {}
297
298 #else /* lint */
299
300 ENTRY_NP(tickcmpr_disable)
301 mov 1, %g1
302 sllx %g1, TICKINT_DIS_SHFT, %o0
303 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR
304 retl
305 nop
306 SET_SIZE(tickcmpr_disable)
307
308 #endif /* lint */
309
310 #if defined(lint)
311
312 /*
313 * tick_write_delta() increments %tick by the specified delta. This should
314 * only be called after a CPR event to assure that gethrtime() continues to
315 * increase monotonically. Obviously, writing %tick needs to de done very
316 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
317 * this reason, we make sure we're i-cache hot before actually writing to
318 * %tick.
319 */
320 /*ARGSUSED*/
321 void
322 tick_write_delta(uint64_t delta)
323 {}
324
325 #else /* lint */
326
327 #ifdef DEBUG
328 .seg ".text"
329 tick_write_panic:
330 .asciz "tick_write_delta: interrupts already disabled on entry"
331 #endif /* DEBUG */
332
333 ENTRY_NP(tick_write_delta)
334 rdpr %pstate, %g1
335 #ifdef DEBUG
336 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
337 bnz 0f ! aren't already disabled.
338 sethi %hi(tick_write_panic), %o1
339 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
340 call panic
341 or %i1, %lo(tick_write_panic), %o0
342 #endif /* DEBUG */
343 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
344 mov %o0, %o2
345 ba 0f ! Branch to cache line-aligned instr.
346 nop
347 .align 16
348 0: nop ! The next 3 instructions are now hot.
349 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
350
351 retl ! Return
352 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
353 #endif /* lint */
354
355 #if defined(lint)
356 /*
357 * return 1 if disabled
358 */
359
360 int
361 tickcmpr_disabled(void)
362 { return (0); }
363
364 #else /* lint */
365
366 ENTRY_NP(tickcmpr_disabled)
367 RD_TICKCMPR(%g1, %o0)
368 retl
369 srlx %g1, TICKINT_DIS_SHFT, %o0
370 SET_SIZE(tickcmpr_disabled)
371
372 #endif /* lint */
373
374 /*
375 * Get current tick
376 */
377 #if defined(lint)
378
379 u_longlong_t
380 gettick(void)
381 { return (0); }
382
383 u_longlong_t
384 randtick(void)
385 { return (0); }
386
387 #else /* lint */
388
389 ENTRY(gettick)
390 ALTENTRY(randtick)
391 GET_NATIVE_TIME(%o0, %o2, %o3)
392 retl
393 nop
394 SET_SIZE(randtick)
395 SET_SIZE(gettick)
396
397 #endif /* lint */
398
399
400 /*
401 * Return the counter portion of the tick register.
402 */
403
404 #if defined(lint)
405
406 uint64_t
407 gettick_counter(void)
408 { return(0); }
409
410 #else /* lint */
411
412 ENTRY_NP(gettick_counter)
413 rdpr %tick, %o0
414 sllx %o0, 1, %o0
415 retl
416 srlx %o0, 1, %o0 ! shake off npt bit
417 SET_SIZE(gettick_counter)
418 #endif /* lint */
419
420 /*
421 * Provide a C callable interface to the trap that reads the hi-res timer.
422 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
423 */
424
425 #if defined(lint)
426
427 hrtime_t
428 gethrtime(void)
429 {
430 return ((hrtime_t)0);
431 }
432
433 hrtime_t
434 gethrtime_unscaled(void)
435 {
436 return ((hrtime_t)0);
437 }
438
439 hrtime_t
440 gethrtime_max(void)
441 {
442 return ((hrtime_t)0);
443 }
444
445 void
446 scalehrtime(hrtime_t *hrt)
447 {
448 *hrt = 0;
449 }
450
451 void
452 gethrestime(timespec_t *tp)
453 {
454 tp->tv_sec = 0;
455 tp->tv_nsec = 0;
456 }
457
458 time_t
459 gethrestime_sec(void)
460 {
461 return (0);
462 }
463
464 void
465 gethrestime_lasttick(timespec_t *tp)
466 {
467 tp->tv_sec = 0;
468 tp->tv_nsec = 0;
469 }
470
471 /*ARGSUSED*/
472 void
473 hres_tick(void)
474 {
475 }
476
477 void
478 panic_hres_tick(void)
479 {
480 }
481
482 #else /* lint */
483
484 ENTRY_NP(gethrtime)
485 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
486 ! %g1 = hrtime
487 retl
488 mov %g1, %o0
489 SET_SIZE(gethrtime)
490
491 ENTRY_NP(gethrtime_unscaled)
492 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
493 retl
494 mov %g1, %o0
495 SET_SIZE(gethrtime_unscaled)
496
497 ENTRY_NP(gethrtime_waitfree)
498 ALTENTRY(dtrace_gethrtime)
499 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
500 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
501 retl
502 mov %g1, %o0
503 SET_SIZE(dtrace_gethrtime)
755 inc %i1 ! release lock
756 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock
757
758 ret
759 restore
760
761 9:
762 !
763 ! release hres_lock
764 !
765 ld [%l4 + %lo(hres_lock)], %i1
766 inc %i1
767 st %i1, [%l4 + %lo(hres_lock)]
768
769 sethi %hi(hrtime_base_panic), %o0
770 call panic
771 or %o0, %lo(hrtime_base_panic), %o0
772
773 SET_SIZE(hres_tick)
774
775 #endif /* lint */
776
777 #if !defined(lint) && !defined(__lint)
778
779 .seg ".text"
780 kstat_q_panic_msg:
781 .asciz "kstat_q_exit: qlen == 0"
782
783 ENTRY(kstat_q_panic)
784 save %sp, -SA(MINFRAME), %sp
785 sethi %hi(kstat_q_panic_msg), %o0
786 call panic
787 or %o0, %lo(kstat_q_panic_msg), %o0
788 /*NOTREACHED*/
789 SET_SIZE(kstat_q_panic)
790
791 #define BRZPN brz,pn
792 #define BRZPT brz,pt
793
794 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
795 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
796 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
797 QBR %o1, QZERO; /* done if qlen == 0 */ \
798 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
868 GET_NATIVE_TIME(%g1, %g2, %g3)
869 #if defined(DEBUG)
870 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
871 #else
872 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
873 #endif
874 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
875 SET_SIZE(kstat_waitq_to_runq)
876
877 .align 16
878 ENTRY(kstat_runq_back_to_waitq)
879 GET_NATIVE_TIME(%g1, %g2, %g3)
880 #if defined(DEBUG)
881 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
882 #else
883 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
884 #endif
885 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
886 SET_SIZE(kstat_runq_back_to_waitq)
887
888 #endif /* !(lint || __lint) */
889
890 #ifdef lint
891
892 int64_t timedelta;
893 hrtime_t hres_last_tick;
894 volatile timestruc_t hrestime;
895 int64_t hrestime_adj;
896 volatile int hres_lock;
897 uint_t nsec_scale;
898 hrtime_t hrtime_base;
899 int traptrace_use_stick;
900
901 #else /* lint */
902 /*
903 * -- WARNING --
904 *
905 * The following variables MUST be together on a 128-byte boundary.
906 * In addition to the primary performance motivation (having them all
907 * on the same cache line(s)), code here and in the GET*TIME() macros
908 * assumes that they all have the same high 22 address bits (so
909 * there's only one sethi).
910 */
911 .seg ".data"
912 .global timedelta, hres_last_tick, hrestime, hrestime_adj
913 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
914 .global nsec_shift, adj_shift
915
916 /* XXX - above comment claims 128-bytes is necessary */
917 .align 64
918 timedelta:
919 .word 0, 0 /* int64_t */
920 hres_last_tick:
921 .word 0, 0 /* hrtime_t */
922 hrestime:
923 .nword 0, 0 /* 2 longs */
924 hrestime_adj:
925 .word 0, 0 /* int64_t */
926 hres_lock:
927 .word 0
928 nsec_scale:
929 .word 0
930 hrtime_base:
931 .word 0, 0
932 traptrace_use_stick:
933 .word 0
934 nsec_shift:
935 .word NSEC_SHIFT
936 adj_shift:
937 .word ADJ_SHIFT
938
939 #endif /* lint */
940
941
942 /*
943 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
944 * usec_delay(int n) [compatibility - should go one day]
945 * Delay by spinning.
946 *
947 * delay for n microseconds. numbers <= 0 delay 1 usec
948 *
949 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
950 * and variable clock rate for power management requires that we
951 * use %stick to implement this routine.
952 *
953 * For OPL platforms that support the "sleep" instruction, we
954 * conditionally (ifdef'ed) insert a "sleep" instruction in
955 * the loop. Note that theoritically we should have move (duplicated)
956 * the code down to spitfire/us3/opl specific asm files - but this
957 * is alot of code duplication just to add one "sleep" instruction.
958 * We chose less code duplication for this.
959 */
960
961 #if defined(lint)
962
963 /*ARGSUSED*/
964 void
965 drv_usecwait(clock_t n)
966 {}
967
968 /*ARGSUSED*/
969 void
970 usec_delay(int n)
971 {}
972
973 #else /* lint */
974
975 ENTRY(drv_usecwait)
976 ALTENTRY(usec_delay)
977 brlez,a,pn %o0, 0f
978 mov 1, %o0
979 0:
980 sethi %hi(sticks_per_usec), %o1
981 lduw [%o1 + %lo(sticks_per_usec)], %o1
982 mulx %o1, %o0, %o1 ! Scale usec to ticks
983 inc %o1 ! We don't start on a tick edge
984 GET_NATIVE_TIME(%o2, %o3, %o4)
985 add %o1, %o2, %o1
986
987 1:
988 #ifdef _OPL
989 .word 0x81b01060 ! insert "sleep" instruction
990 #endif /* _OPL */ ! use byte code for now
991 cmp %o1, %o2
992 GET_NATIVE_TIME(%o2, %o3, %o4)
993 bgeu,pt %xcc, 1b
994 nop
995 retl
996 nop
997 SET_SIZE(usec_delay)
998 SET_SIZE(drv_usecwait)
999 #endif /* lint */
1000
1001 #if defined(lint)
1002
1003 /* ARGSUSED */
1004 void
1005 pil14_interrupt(int level)
1006 {}
1007
1008 #else /* lint */
1009
1010 /*
1011 * Level-14 interrupt prologue.
1012 */
1013 ENTRY_NP(pil14_interrupt)
1014 CPU_ADDR(%g1, %g2)
1015 rdpr %pil, %g6 ! %g6 = interrupted PIL
1016 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
1017 rdpr %tstate, %g6
1018 rdpr %tpc, %g5
1019 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1020 bnz,a,pt %xcc, 1f
1021 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
1022 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
1023 ba pil_interrupt_common ! must be large-disp branch
1024 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
1025 1: ba pil_interrupt_common ! must be large-disp branch
1026 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
1027 SET_SIZE(pil14_interrupt)
1028
1029 ENTRY_NP(tick_rtt)
1079 ! If we're here, then we have programmed TICK_COMPARE with a %tick
1080 ! which is in the past; we'll now load an initial step size, and loop
1081 ! until we've managed to program TICK_COMPARE to fire in the future.
1082 !
1083 mov 8, %o4 ! 8 = arbitrary inital step
1084 1: add %o0, %o4, %o5 ! Add the step
1085 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR
1086 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1087 sllx %o0, 1, %o0 ! Clear the DIS bit
1088 srlx %o0, 1, %o0
1089 cmp %o5, %o0 ! In the future?
1090 bg,a,pt %xcc, 2f ! Yes, drive on.
1091 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1092 ba 1b ! No, try again.
1093 sllx %o4, 1, %o4 ! delay: double step size
1094
1095 2: ba current_thread_complete
1096 nop
1097 SET_SIZE(tick_rtt)
1098
1099 #endif /* lint */
1100
1101 #if defined(lint)
1102
1103 /* ARGSUSED */
1104 void
1105 pil15_interrupt(int level)
1106 {}
1107
1108 #else /* lint */
1109
1110 /*
1111 * Level-15 interrupt prologue.
1112 */
1113 ENTRY_NP(pil15_interrupt)
1114 CPU_ADDR(%g1, %g2)
1115 rdpr %tstate, %g6
1116 rdpr %tpc, %g5
1117 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1118 bnz,a,pt %xcc, 1f
1119 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC
1120 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
1121 ba pil15_epilogue ! must be large-disp branch
1122 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC
1123 1: ba pil15_epilogue ! must be large-disp branch
1124 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
1125 SET_SIZE(pil15_interrupt)
1126
1127 #endif /* lint */
1128
1129 #if defined(lint) || defined(__lint)
1130
1131 /* ARGSUSED */
1132 uint64_t
1133 find_cpufrequency(volatile uchar_t *clock_ptr)
1134 {
1135 return (0);
1136 }
1137
1138 #else /* lint */
1139
1140 #ifdef DEBUG
1141 .seg ".text"
1142 find_cpufreq_panic:
1143 .asciz "find_cpufrequency: interrupts already disabled on entry"
1144 #endif /* DEBUG */
1145
1146 ENTRY_NP(find_cpufrequency)
1147 rdpr %pstate, %g1
1148
1149 #ifdef DEBUG
1150 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
1151 bnz 0f ! are currently enabled
1152 sethi %hi(find_cpufreq_panic), %o1
1153 call panic
1154 or %o1, %lo(find_cpufreq_panic), %o0
1155 #endif /* DEBUG */
1156
1157 0:
1158 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
1159 3:
1167
1168 brz,pn %o2, 3b ! if the minutes just rolled over,
1169 ! the last second could have been
1170 ! inaccurate; try again.
1171 mov %o2, %o4 ! delay: store init. val. in %o2
1172 2:
1173 GET_NATIVE_TIME(%o5, %g4, %g5)
1174 cmp %o2, %o4 ! did the seconds register roll over?
1175 be,pt %icc, 2b ! branch back if unchanged
1176 ldub [%o0], %o4 ! delay: load the new seconds val
1177
1178 brz,pn %o4, 0b ! if the minutes just rolled over,
1179 ! the last second could have been
1180 ! inaccurate; try again.
1181 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts
1182
1183 retl
1184 sub %o5, %o3, %o0 ! return the difference in ticks
1185 SET_SIZE(find_cpufrequency)
1186
1187 #endif /* lint */
1188
1189 #if defined(lint)
1190 /*
1191 * Prefetch a page_t for write or read, this assumes a linear
1192 * scan of sequential page_t's.
1193 */
1194 /*ARGSUSED*/
1195 void
1196 prefetch_page_w(void *pp)
1197 {}
1198
1199 /*ARGSUSED*/
1200 void
1201 prefetch_page_r(void *pp)
1202 {}
1203 #else /* lint */
1204
1205 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1206 defined(SERRANO)
1207 !
1208 ! On US-III, the prefetch instruction queue is 8 entries deep.
1209 ! Also, prefetches for write put data in the E$, which has
1210 ! lines of 512 bytes for an 8MB cache. Each E$ line is further
1211 ! subblocked into 64 byte chunks.
1212 !
1213 ! Since prefetch can only bring in 64 bytes at a time (See Sparc
1214 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1215 ! then 2 prefetches are required in order to bring an entire
1216 ! page into the E$.
1217 !
1218 ! Since the prefetch queue is 8 entries deep, we currently can
1219 ! only have 4 prefetches for page_t's outstanding. Thus, we
1220 ! prefetch n+4 ahead of where we are now:
1221 !
1222 ! 4 * sizeof(page_t) -> 512
1223 ! 4 * sizeof(page_t) +64 -> 576
1224 !
1270 ! we'll need an additional prefetch to get an entire page
1271 ! into the E$, thus reducing the number of outstanding page
1272 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1273 ! etc.
1274 !
1275 ! Cheetah+
1276 ! ========
1277 ! On Cheetah+ we use "#n_write" prefetches as these avoid
1278 ! unnecessary RTS->RTO bus transaction state change, and
1279 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1280 ! PRM). On Cheetah, #n_write prefetches are reflected with
1281 ! RTS->RTO state transition regardless.
1282 !
1283 #define STRIDE1 512
1284 #define STRIDE2 576
1285
1286 #if STRIDE1 != (PAGE_SIZE * 4)
1287 #error "STRIDE1 != (PAGE_SIZE * 4)"
1288 #endif /* STRIDE1 != (PAGE_SIZE * 4) */
1289
1290 ENTRY(prefetch_page_w)
1291 prefetch [%o0+STRIDE1], #n_writes
1292 retl
1293 prefetch [%o0+STRIDE2], #n_writes
1294 SET_SIZE(prefetch_page_w)
1295
1296 !
1297 ! Note on CHEETAH to prefetch for read, we really use #one_write.
1298 ! This fetches to E$ (general use) rather than P$ (floating point use).
1299 !
1300 ENTRY(prefetch_page_r)
1301 prefetch [%o0+STRIDE1], #one_write
1302 retl
1303 prefetch [%o0+STRIDE2], #one_write
1304 SET_SIZE(prefetch_page_r)
1305
1306 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1307
1308 !
1309 ! UltraSparcII can have up to 3 prefetches outstanding.
1341 #define STRIDE1 0x440
1342 #define STRIDE2 0x640
1343
1344 ENTRY(prefetch_page_w)
1345 prefetch [%o0+STRIDE1], #n_writes
1346 retl
1347 prefetch [%o0+STRIDE2], #n_writes
1348 SET_SIZE(prefetch_page_w)
1349
1350 ENTRY(prefetch_page_r)
1351 prefetch [%o0+STRIDE1], #n_writes
1352 retl
1353 prefetch [%o0+STRIDE2], #n_writes
1354 SET_SIZE(prefetch_page_r)
1355 #else /* OLYMPUS_C */
1356
1357 #error "You need to fix this for your new cpu type."
1358
1359 #endif /* OLYMPUS_C */
1360
1361 #endif /* lint */
1362
1363 #if defined(lint)
1364 /*
1365 * Prefetch struct smap for write.
1366 */
1367 /*ARGSUSED*/
1368 void
1369 prefetch_smap_w(void *smp)
1370 {}
1371 #else /* lint */
1372
1373 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1374 defined(SERRANO)
1375
1376 #define PREFETCH_Q_LEN 8
1377
1378 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1379
1380 #define PREFETCH_Q_LEN 3
1381
1382 #elif defined(OLYMPUS_C)
1383 !
1384 ! Use length of one for now.
1385 !
1386 #define PREFETCH_Q_LEN 1
1387
1388 #else /* OLYMPUS_C */
1389
1390 #error You need to fix this for your new cpu type.
1391
1392 #endif /* OLYMPUS_C */
1399 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1400
1401 #else /* SEGKPM_SUPPORT */
1402
1403 !
1404 ! The hardware will prefetch the 64 byte cache aligned block
1405 ! that contains the address specified in the prefetch instruction.
1406 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1407 ! per pass will suffice as long as we prefetch far enough ahead to
1408 ! make sure we don't stall for the cases where the smap object
1409 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1410 ! ahead as the hardware will allow.
1411 !
1412 ! The smap array is processed with decreasing address pointers.
1413 !
1414 #define SMAP_SIZE 48
1415 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1416
1417 #endif /* SEGKPM_SUPPORT */
1418
1419 ENTRY(prefetch_smap_w)
1420 retl
1421 prefetch [%o0-SMAP_STRIDE], #n_writes
1422 SET_SIZE(prefetch_smap_w)
1423
1424 #endif /* lint */
1425
1426 #if defined(lint) || defined(__lint)
1427
1428 /* ARGSUSED */
1429 uint64_t
1430 getidsr(void)
1431 { return 0; }
1432
1433 #else /* lint */
1434
1435 ENTRY_NP(getidsr)
1436 retl
1437 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1438 SET_SIZE(getidsr)
1439
1440 #endif /* lint */
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include "assym.h"
26
27 /*
28 * General assembly language routines.
29 * It is the intent of this file to contain routines that are
30 * specific to cpu architecture.
31 */
32
33 /*
34 * WARNING: If you add a fast trap handler which can be invoked by a
35 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
36 * instead of "done" instruction to return back to the user mode. See
37 * comments for the "fast_trap_done" entry point for more information.
38 */
39 #define FAST_TRAP_DONE \
40 ba,a fast_trap_done
41
42 /*
43 * Override GET_NATIVE_TIME for the cpu module code. This is not
44 * guaranteed to be exactly one instruction, be careful of using
45 * the macro in delay slots.
194 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
195 * with a read, thus stalling the pipe and keeping following instructions
196 * from causing data corruption. Aligning to a quadword will ensure these
197 * two instructions are not split due to i$ misses.
198 */
199 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
200 ba,a .bb_errata_1.label ;\
201 .align 64 ;\
202 .bb_errata_1.label: ;\
203 wr cmpr, TICK_COMPARE ;\
204 rd TICK_COMPARE, %g0
205 #else /* BB_ERRATA_1 */
206 #define WR_TICKCMPR(in,scr1,scr2,label) \
207 wr in, TICK_COMPARE
208 #endif /* BB_ERRATA_1 */
209
210 #endif /* !CHEETAH && !HUMMINGBIRD */
211
212 #include <sys/clock.h>
213
214
215 #include <sys/asm_linkage.h>
216 #include <sys/privregs.h>
217 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
218 #include <sys/machthread.h>
219 #include <sys/clock.h>
220 #include <sys/intreg.h>
221 #include <sys/psr_compat.h>
222 #include <sys/isa_defs.h>
223 #include <sys/dditypes.h>
224 #include <sys/intr.h>
225
226 #include "assym.h"
227
228 ENTRY(get_impl)
229 GET_CPU_IMPL(%o0)
230 retl
231 nop
232 SET_SIZE(get_impl)
233
234 /*
235 * Softint generated when counter field of tick reg matches value field
236 * of tick_cmpr reg
237 */
238 ENTRY_NP(tickcmpr_set)
239 ! get 64-bit clock_cycles interval
240 mov %o0, %o2
241 mov 8, %o3 ! A reasonable initial step size
242 1:
243 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR
244
245 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the
246 sllx %o0, 1, %o0 ! value we wrote was in the future.
247 srlx %o0, 1, %o0
248
249 cmp %o2, %o0 ! If the value we wrote was in the
250 bg,pt %xcc, 2f ! future, then blow out of here.
251 sllx %o3, 1, %o3 ! If not, then double our step size,
252 ba,pt %xcc, 1b ! and take another lap.
253 add %o0, %o3, %o2 !
254 2:
255 retl
256 nop
257 SET_SIZE(tickcmpr_set)
258
259 ENTRY_NP(tickcmpr_disable)
260 mov 1, %g1
261 sllx %g1, TICKINT_DIS_SHFT, %o0
262 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR
263 retl
264 nop
265 SET_SIZE(tickcmpr_disable)
266
267 #ifdef DEBUG
268 .seg ".text"
269 tick_write_panic:
270 .asciz "tick_write_delta: interrupts already disabled on entry"
271 #endif /* DEBUG */
272
273 /*
274 * tick_write_delta() increments %tick by the specified delta. This should
275 * only be called after a CPR event to assure that gethrtime() continues to
276 * increase monotonically. Obviously, writing %tick needs to de done very
277 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
278 * this reason, we make sure we're i-cache hot before actually writing to
279 * %tick.
280 */
281 ENTRY_NP(tick_write_delta)
282 rdpr %pstate, %g1
283 #ifdef DEBUG
284 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
285 bnz 0f ! aren't already disabled.
286 sethi %hi(tick_write_panic), %o1
287 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
288 call panic
289 or %i1, %lo(tick_write_panic), %o0
290 #endif /* DEBUG */
291 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
292 mov %o0, %o2
293 ba 0f ! Branch to cache line-aligned instr.
294 nop
295 .align 16
296 0: nop ! The next 3 instructions are now hot.
297 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
298
299 retl ! Return
300 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
301
302 ENTRY_NP(tickcmpr_disabled)
303 RD_TICKCMPR(%g1, %o0)
304 retl
305 srlx %g1, TICKINT_DIS_SHFT, %o0
306 SET_SIZE(tickcmpr_disabled)
307
308 /*
309 * Get current tick
310 */
311
312 ENTRY(gettick)
313 ALTENTRY(randtick)
314 GET_NATIVE_TIME(%o0, %o2, %o3)
315 retl
316 nop
317 SET_SIZE(randtick)
318 SET_SIZE(gettick)
319
320
321 /*
322 * Return the counter portion of the tick register.
323 */
324
325 ENTRY_NP(gettick_counter)
326 rdpr %tick, %o0
327 sllx %o0, 1, %o0
328 retl
329 srlx %o0, 1, %o0 ! shake off npt bit
330 SET_SIZE(gettick_counter)
331
332 /*
333 * Provide a C callable interface to the trap that reads the hi-res timer.
334 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
335 */
336
337 ENTRY_NP(gethrtime)
338 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
339 ! %g1 = hrtime
340 retl
341 mov %g1, %o0
342 SET_SIZE(gethrtime)
343
344 ENTRY_NP(gethrtime_unscaled)
345 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
346 retl
347 mov %g1, %o0
348 SET_SIZE(gethrtime_unscaled)
349
350 ENTRY_NP(gethrtime_waitfree)
351 ALTENTRY(dtrace_gethrtime)
352 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
353 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
354 retl
355 mov %g1, %o0
356 SET_SIZE(dtrace_gethrtime)
608 inc %i1 ! release lock
609 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock
610
611 ret
612 restore
613
614 9:
615 !
616 ! release hres_lock
617 !
618 ld [%l4 + %lo(hres_lock)], %i1
619 inc %i1
620 st %i1, [%l4 + %lo(hres_lock)]
621
622 sethi %hi(hrtime_base_panic), %o0
623 call panic
624 or %o0, %lo(hrtime_base_panic), %o0
625
626 SET_SIZE(hres_tick)
627
628 .seg ".text"
629 kstat_q_panic_msg:
630 .asciz "kstat_q_exit: qlen == 0"
631
632 ENTRY(kstat_q_panic)
633 save %sp, -SA(MINFRAME), %sp
634 sethi %hi(kstat_q_panic_msg), %o0
635 call panic
636 or %o0, %lo(kstat_q_panic_msg), %o0
637 /*NOTREACHED*/
638 SET_SIZE(kstat_q_panic)
639
640 #define BRZPN brz,pn
641 #define BRZPT brz,pt
642
643 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
644 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
645 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
646 QBR %o1, QZERO; /* done if qlen == 0 */ \
647 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
717 GET_NATIVE_TIME(%g1, %g2, %g3)
718 #if defined(DEBUG)
719 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
720 #else
721 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
722 #endif
723 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
724 SET_SIZE(kstat_waitq_to_runq)
725
726 .align 16
727 ENTRY(kstat_runq_back_to_waitq)
728 GET_NATIVE_TIME(%g1, %g2, %g3)
729 #if defined(DEBUG)
730 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
731 #else
732 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
733 #endif
734 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
735 SET_SIZE(kstat_runq_back_to_waitq)
736
737 /*
738 * -- WARNING --
739 *
740 * The following variables MUST be together on a 128-byte boundary.
741 * In addition to the primary performance motivation (having them all
742 * on the same cache line(s)), code here and in the GET*TIME() macros
743 * assumes that they all have the same high 22 address bits (so
744 * there's only one sethi).
745 */
746 .seg ".data"
747 .global timedelta, hres_last_tick, hrestime, hrestime_adj
748 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
749 .global nsec_shift, adj_shift
750
751 /* XXX - above comment claims 128-bytes is necessary */
752 .align 64
753 timedelta:
754 .word 0, 0 /* int64_t */
755 hres_last_tick:
756 .word 0, 0 /* hrtime_t */
757 hrestime:
758 .nword 0, 0 /* 2 longs */
759 hrestime_adj:
760 .word 0, 0 /* int64_t */
761 hres_lock:
762 .word 0
763 nsec_scale:
764 .word 0
765 hrtime_base:
766 .word 0, 0
767 traptrace_use_stick:
768 .word 0
769 nsec_shift:
770 .word NSEC_SHIFT
771 adj_shift:
772 .word ADJ_SHIFT
773
774
775 /*
776 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
777 * usec_delay(int n) [compatibility - should go one day]
778 * Delay by spinning.
779 *
780 * delay for n microseconds. numbers <= 0 delay 1 usec
781 *
782 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
783 * and variable clock rate for power management requires that we
784 * use %stick to implement this routine.
785 *
786 * For OPL platforms that support the "sleep" instruction, we
787 * conditionally (ifdef'ed) insert a "sleep" instruction in
788 * the loop. Note that theoritically we should have move (duplicated)
789 * the code down to spitfire/us3/opl specific asm files - but this
790 * is alot of code duplication just to add one "sleep" instruction.
791 * We chose less code duplication for this.
792 */
793
794 ENTRY(drv_usecwait)
795 ALTENTRY(usec_delay)
796 brlez,a,pn %o0, 0f
797 mov 1, %o0
798 0:
799 sethi %hi(sticks_per_usec), %o1
800 lduw [%o1 + %lo(sticks_per_usec)], %o1
801 mulx %o1, %o0, %o1 ! Scale usec to ticks
802 inc %o1 ! We don't start on a tick edge
803 GET_NATIVE_TIME(%o2, %o3, %o4)
804 add %o1, %o2, %o1
805
806 1:
807 #ifdef _OPL
808 .word 0x81b01060 ! insert "sleep" instruction
809 #endif /* _OPL */ ! use byte code for now
810 cmp %o1, %o2
811 GET_NATIVE_TIME(%o2, %o3, %o4)
812 bgeu,pt %xcc, 1b
813 nop
814 retl
815 nop
816 SET_SIZE(usec_delay)
817 SET_SIZE(drv_usecwait)
818
819 /*
820 * Level-14 interrupt prologue.
821 */
822 ENTRY_NP(pil14_interrupt)
823 CPU_ADDR(%g1, %g2)
824 rdpr %pil, %g6 ! %g6 = interrupted PIL
825 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
826 rdpr %tstate, %g6
827 rdpr %tpc, %g5
828 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
829 bnz,a,pt %xcc, 1f
830 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
831 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
832 ba pil_interrupt_common ! must be large-disp branch
833 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
834 1: ba pil_interrupt_common ! must be large-disp branch
835 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
836 SET_SIZE(pil14_interrupt)
837
838 ENTRY_NP(tick_rtt)
888 ! If we're here, then we have programmed TICK_COMPARE with a %tick
889 ! which is in the past; we'll now load an initial step size, and loop
890 ! until we've managed to program TICK_COMPARE to fire in the future.
891 !
892 mov 8, %o4 ! 8 = arbitrary inital step
893 1: add %o0, %o4, %o5 ! Add the step
894 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR
895 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
896 sllx %o0, 1, %o0 ! Clear the DIS bit
897 srlx %o0, 1, %o0
898 cmp %o5, %o0 ! In the future?
899 bg,a,pt %xcc, 2f ! Yes, drive on.
900 wrpr %g0, %g5, %pstate ! delay: enable vec intr
901 ba 1b ! No, try again.
902 sllx %o4, 1, %o4 ! delay: double step size
903
904 2: ba current_thread_complete
905 nop
906 SET_SIZE(tick_rtt)
907
908 /*
909 * Level-15 interrupt prologue.
910 */
911 ENTRY_NP(pil15_interrupt)
912 CPU_ADDR(%g1, %g2)
913 rdpr %tstate, %g6
914 rdpr %tpc, %g5
915 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
916 bnz,a,pt %xcc, 1f
917 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC
918 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
919 ba pil15_epilogue ! must be large-disp branch
920 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC
921 1: ba pil15_epilogue ! must be large-disp branch
922 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
923 SET_SIZE(pil15_interrupt)
924
925 #ifdef DEBUG
926 .seg ".text"
927 find_cpufreq_panic:
928 .asciz "find_cpufrequency: interrupts already disabled on entry"
929 #endif /* DEBUG */
930
931 ENTRY_NP(find_cpufrequency)
932 rdpr %pstate, %g1
933
934 #ifdef DEBUG
935 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
936 bnz 0f ! are currently enabled
937 sethi %hi(find_cpufreq_panic), %o1
938 call panic
939 or %o1, %lo(find_cpufreq_panic), %o0
940 #endif /* DEBUG */
941
942 0:
943 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
944 3:
952
953 brz,pn %o2, 3b ! if the minutes just rolled over,
954 ! the last second could have been
955 ! inaccurate; try again.
956 mov %o2, %o4 ! delay: store init. val. in %o2
957 2:
958 GET_NATIVE_TIME(%o5, %g4, %g5)
959 cmp %o2, %o4 ! did the seconds register roll over?
960 be,pt %icc, 2b ! branch back if unchanged
961 ldub [%o0], %o4 ! delay: load the new seconds val
962
963 brz,pn %o4, 0b ! if the minutes just rolled over,
964 ! the last second could have been
965 ! inaccurate; try again.
966 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts
967
968 retl
969 sub %o5, %o3, %o0 ! return the difference in ticks
970 SET_SIZE(find_cpufrequency)
971
972 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
973 defined(SERRANO)
974 !
975 ! On US-III, the prefetch instruction queue is 8 entries deep.
976 ! Also, prefetches for write put data in the E$, which has
977 ! lines of 512 bytes for an 8MB cache. Each E$ line is further
978 ! subblocked into 64 byte chunks.
979 !
980 ! Since prefetch can only bring in 64 bytes at a time (See Sparc
981 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
982 ! then 2 prefetches are required in order to bring an entire
983 ! page into the E$.
984 !
985 ! Since the prefetch queue is 8 entries deep, we currently can
986 ! only have 4 prefetches for page_t's outstanding. Thus, we
987 ! prefetch n+4 ahead of where we are now:
988 !
989 ! 4 * sizeof(page_t) -> 512
990 ! 4 * sizeof(page_t) +64 -> 576
991 !
1037 ! we'll need an additional prefetch to get an entire page
1038 ! into the E$, thus reducing the number of outstanding page
1039 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1040 ! etc.
1041 !
1042 ! Cheetah+
1043 ! ========
1044 ! On Cheetah+ we use "#n_write" prefetches as these avoid
1045 ! unnecessary RTS->RTO bus transaction state change, and
1046 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1047 ! PRM). On Cheetah, #n_write prefetches are reflected with
1048 ! RTS->RTO state transition regardless.
1049 !
1050 #define STRIDE1 512
1051 #define STRIDE2 576
1052
1053 #if STRIDE1 != (PAGE_SIZE * 4)
1054 #error "STRIDE1 != (PAGE_SIZE * 4)"
1055 #endif /* STRIDE1 != (PAGE_SIZE * 4) */
1056
1057 /*
1058 * Prefetch a page_t for write or read, this assumes a linear
1059 * scan of sequential page_t's.
1060 */
1061 ENTRY(prefetch_page_w)
1062 prefetch [%o0+STRIDE1], #n_writes
1063 retl
1064 prefetch [%o0+STRIDE2], #n_writes
1065 SET_SIZE(prefetch_page_w)
1066
1067 !
1068 ! Note on CHEETAH to prefetch for read, we really use #one_write.
1069 ! This fetches to E$ (general use) rather than P$ (floating point use).
1070 !
1071 ENTRY(prefetch_page_r)
1072 prefetch [%o0+STRIDE1], #one_write
1073 retl
1074 prefetch [%o0+STRIDE2], #one_write
1075 SET_SIZE(prefetch_page_r)
1076
1077 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1078
1079 !
1080 ! UltraSparcII can have up to 3 prefetches outstanding.
1112 #define STRIDE1 0x440
1113 #define STRIDE2 0x640
1114
1115 ENTRY(prefetch_page_w)
1116 prefetch [%o0+STRIDE1], #n_writes
1117 retl
1118 prefetch [%o0+STRIDE2], #n_writes
1119 SET_SIZE(prefetch_page_w)
1120
1121 ENTRY(prefetch_page_r)
1122 prefetch [%o0+STRIDE1], #n_writes
1123 retl
1124 prefetch [%o0+STRIDE2], #n_writes
1125 SET_SIZE(prefetch_page_r)
1126 #else /* OLYMPUS_C */
1127
1128 #error "You need to fix this for your new cpu type."
1129
1130 #endif /* OLYMPUS_C */
1131
1132 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1133 defined(SERRANO)
1134
1135 #define PREFETCH_Q_LEN 8
1136
1137 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1138
1139 #define PREFETCH_Q_LEN 3
1140
1141 #elif defined(OLYMPUS_C)
1142 !
1143 ! Use length of one for now.
1144 !
1145 #define PREFETCH_Q_LEN 1
1146
1147 #else /* OLYMPUS_C */
1148
1149 #error You need to fix this for your new cpu type.
1150
1151 #endif /* OLYMPUS_C */
1158 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1159
1160 #else /* SEGKPM_SUPPORT */
1161
1162 !
1163 ! The hardware will prefetch the 64 byte cache aligned block
1164 ! that contains the address specified in the prefetch instruction.
1165 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1166 ! per pass will suffice as long as we prefetch far enough ahead to
1167 ! make sure we don't stall for the cases where the smap object
1168 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1169 ! ahead as the hardware will allow.
1170 !
1171 ! The smap array is processed with decreasing address pointers.
1172 !
1173 #define SMAP_SIZE 48
1174 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1175
1176 #endif /* SEGKPM_SUPPORT */
1177
1178 /*
1179 * Prefetch struct smap for write.
1180 */
1181 ENTRY(prefetch_smap_w)
1182 retl
1183 prefetch [%o0-SMAP_STRIDE], #n_writes
1184 SET_SIZE(prefetch_smap_w)
1185
1186 ENTRY_NP(getidsr)
1187 retl
1188 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1189 SET_SIZE(getidsr)
1190
|