5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #if !defined(lint)
26 #include "assym.h"
27 #endif /* !lint */
28
29 /*
30 * General assembly language routines.
31 * It is the intent of this file to contain routines that are
32 * specific to cpu architecture.
33 */
34
35 /*
36 * WARNING: If you add a fast trap handler which can be invoked by a
37 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
38 * instead of "done" instruction to return back to the user mode. See
39 * comments for the "fast_trap_done" entry point for more information.
40 */
41 #define FAST_TRAP_DONE \
42 ba,a fast_trap_done
43
44 /*
45 * Override GET_NATIVE_TIME for the cpu module code. This is not
46 * guaranteed to be exactly one instruction, be careful of using
47 * the macro in delay slots.
196 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
197 * with a read, thus stalling the pipe and keeping following instructions
198 * from causing data corruption. Aligning to a quadword will ensure these
199 * two instructions are not split due to i$ misses.
200 */
201 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
202 ba,a .bb_errata_1.label ;\
203 .align 64 ;\
204 .bb_errata_1.label: ;\
205 wr cmpr, TICK_COMPARE ;\
206 rd TICK_COMPARE, %g0
207 #else /* BB_ERRATA_1 */
208 #define WR_TICKCMPR(in,scr1,scr2,label) \
209 wr in, TICK_COMPARE
210 #endif /* BB_ERRATA_1 */
211
212 #endif /* !CHEETAH && !HUMMINGBIRD */
213
214 #include <sys/clock.h>
215
216 #if defined(lint)
217 #include <sys/types.h>
218 #include <sys/scb.h>
219 #include <sys/systm.h>
220 #include <sys/regset.h>
221 #include <sys/sunddi.h>
222 #include <sys/lockstat.h>
223 #endif /* lint */
224
225
226 #include <sys/asm_linkage.h>
227 #include <sys/privregs.h>
228 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
229 #include <sys/machthread.h>
230 #include <sys/clock.h>
231 #include <sys/intreg.h>
232 #include <sys/psr_compat.h>
233 #include <sys/isa_defs.h>
234 #include <sys/dditypes.h>
235 #include <sys/intr.h>
236
237 #if !defined(lint)
238 #include "assym.h"
239 #endif /* !lint */
240
241 #if defined(lint)
242
243 uint_t
244 get_impl(void)
245 { return (0); }
246
247 #else /* lint */
248
249 ENTRY(get_impl)
250 GET_CPU_IMPL(%o0)
251 retl
252 nop
253 SET_SIZE(get_impl)
254
255 #endif /* lint */
256
257 #if defined(lint)
258 /*
259 * Softint generated when counter field of tick reg matches value field
260 * of tick_cmpr reg
261 */
262 /*ARGSUSED*/
263 void
264 tickcmpr_set(uint64_t clock_cycles)
265 {}
266
267 #else /* lint */
268
269 ENTRY_NP(tickcmpr_set)
270 ! get 64-bit clock_cycles interval
271 mov %o0, %o2
272 mov 8, %o3 ! A reasonable initial step size
273 1:
274 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR
275
276 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the
277 sllx %o0, 1, %o0 ! value we wrote was in the future.
278 srlx %o0, 1, %o0
279
280 cmp %o2, %o0 ! If the value we wrote was in the
281 bg,pt %xcc, 2f ! future, then blow out of here.
282 sllx %o3, 1, %o3 ! If not, then double our step size,
283 ba,pt %xcc, 1b ! and take another lap.
284 add %o0, %o3, %o2 !
285 2:
286 retl
287 nop
288 SET_SIZE(tickcmpr_set)
289
290 #endif /* lint */
291
292 #if defined(lint)
293
294 void
295 tickcmpr_disable(void)
296 {}
297
298 #else /* lint */
299
300 ENTRY_NP(tickcmpr_disable)
301 mov 1, %g1
302 sllx %g1, TICKINT_DIS_SHFT, %o0
303 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR
304 retl
305 nop
306 SET_SIZE(tickcmpr_disable)
307
308 #endif /* lint */
309
310 #if defined(lint)
311
312 /*
313 * tick_write_delta() increments %tick by the specified delta. This should
314 * only be called after a CPR event to assure that gethrtime() continues to
315 * increase monotonically. Obviously, writing %tick needs to de done very
316 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
317 * this reason, we make sure we're i-cache hot before actually writing to
318 * %tick.
319 */
320 /*ARGSUSED*/
321 void
322 tick_write_delta(uint64_t delta)
323 {}
324
325 #else /* lint */
326
327 #ifdef DEBUG
328 .seg ".text"
329 tick_write_panic:
330 .asciz "tick_write_delta: interrupts already disabled on entry"
331 #endif /* DEBUG */
332
333 ENTRY_NP(tick_write_delta)
334 rdpr %pstate, %g1
335 #ifdef DEBUG
336 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
337 bnz 0f ! aren't already disabled.
338 sethi %hi(tick_write_panic), %o1
339 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
340 call panic
341 or %i1, %lo(tick_write_panic), %o0
342 #endif /* DEBUG */
343 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
344 mov %o0, %o2
345 ba 0f ! Branch to cache line-aligned instr.
346 nop
347 .align 16
348 0: nop ! The next 3 instructions are now hot.
349 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
350
351 retl ! Return
352 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
353 #endif /* lint */
354
355 #if defined(lint)
356 /*
357 * return 1 if disabled
358 */
359
360 int
361 tickcmpr_disabled(void)
362 { return (0); }
363
364 #else /* lint */
365
366 ENTRY_NP(tickcmpr_disabled)
367 RD_TICKCMPR(%g1, %o0)
368 retl
369 srlx %g1, TICKINT_DIS_SHFT, %o0
370 SET_SIZE(tickcmpr_disabled)
371
372 #endif /* lint */
373
374 /*
375 * Get current tick
376 */
377 #if defined(lint)
378
379 u_longlong_t
380 gettick(void)
381 { return (0); }
382
383 u_longlong_t
384 randtick(void)
385 { return (0); }
386
387 #else /* lint */
388
389 ENTRY(gettick)
390 ALTENTRY(randtick)
391 GET_NATIVE_TIME(%o0, %o2, %o3)
392 retl
393 nop
394 SET_SIZE(randtick)
395 SET_SIZE(gettick)
396
397 #endif /* lint */
398
399
400 /*
401 * Return the counter portion of the tick register.
402 */
403
404 #if defined(lint)
405
406 uint64_t
407 gettick_counter(void)
408 { return(0); }
409
410 #else /* lint */
411
412 ENTRY_NP(gettick_counter)
413 rdpr %tick, %o0
414 sllx %o0, 1, %o0
415 retl
416 srlx %o0, 1, %o0 ! shake off npt bit
417 SET_SIZE(gettick_counter)
418 #endif /* lint */
419
420 /*
421 * Provide a C callable interface to the trap that reads the hi-res timer.
422 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
423 */
424
425 #if defined(lint)
426
427 hrtime_t
428 gethrtime(void)
429 {
430 return ((hrtime_t)0);
431 }
432
433 hrtime_t
434 gethrtime_unscaled(void)
435 {
436 return ((hrtime_t)0);
437 }
438
439 hrtime_t
440 gethrtime_max(void)
441 {
442 return ((hrtime_t)0);
443 }
444
445 void
446 scalehrtime(hrtime_t *hrt)
447 {
448 *hrt = 0;
449 }
450
451 void
452 gethrestime(timespec_t *tp)
453 {
454 tp->tv_sec = 0;
455 tp->tv_nsec = 0;
456 }
457
458 time_t
459 gethrestime_sec(void)
460 {
461 return (0);
462 }
463
464 void
465 gethrestime_lasttick(timespec_t *tp)
466 {
467 tp->tv_sec = 0;
468 tp->tv_nsec = 0;
469 }
470
471 /*ARGSUSED*/
472 void
473 hres_tick(void)
474 {
475 }
476
477 void
478 panic_hres_tick(void)
479 {
480 }
481
482 #else /* lint */
483
484 ENTRY_NP(gethrtime)
485 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
486 ! %g1 = hrtime
487 retl
488 mov %g1, %o0
489 SET_SIZE(gethrtime)
490
491 ENTRY_NP(gethrtime_unscaled)
492 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
493 retl
494 mov %g1, %o0
495 SET_SIZE(gethrtime_unscaled)
496
497 ENTRY_NP(gethrtime_waitfree)
498 ALTENTRY(dtrace_gethrtime)
499 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
500 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
501 retl
502 mov %g1, %o0
503 SET_SIZE(dtrace_gethrtime)
755 inc %i1 ! release lock
756 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock
757
758 ret
759 restore
760
761 9:
762 !
763 ! release hres_lock
764 !
765 ld [%l4 + %lo(hres_lock)], %i1
766 inc %i1
767 st %i1, [%l4 + %lo(hres_lock)]
768
769 sethi %hi(hrtime_base_panic), %o0
770 call panic
771 or %o0, %lo(hrtime_base_panic), %o0
772
773 SET_SIZE(hres_tick)
774
775 #endif /* lint */
776
777 #if !defined(lint) && !defined(__lint)
778
779 .seg ".text"
780 kstat_q_panic_msg:
781 .asciz "kstat_q_exit: qlen == 0"
782
783 ENTRY(kstat_q_panic)
784 save %sp, -SA(MINFRAME), %sp
785 sethi %hi(kstat_q_panic_msg), %o0
786 call panic
787 or %o0, %lo(kstat_q_panic_msg), %o0
788 /*NOTREACHED*/
789 SET_SIZE(kstat_q_panic)
790
791 #define BRZPN brz,pn
792 #define BRZPT brz,pt
793
794 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
795 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
796 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
797 QBR %o1, QZERO; /* done if qlen == 0 */ \
798 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
868 GET_NATIVE_TIME(%g1, %g2, %g3)
869 #if defined(DEBUG)
870 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
871 #else
872 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
873 #endif
874 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
875 SET_SIZE(kstat_waitq_to_runq)
876
877 .align 16
878 ENTRY(kstat_runq_back_to_waitq)
879 GET_NATIVE_TIME(%g1, %g2, %g3)
880 #if defined(DEBUG)
881 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
882 #else
883 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
884 #endif
885 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
886 SET_SIZE(kstat_runq_back_to_waitq)
887
888 #endif /* !(lint || __lint) */
889
890 #ifdef lint
891
892 int64_t timedelta;
893 hrtime_t hres_last_tick;
894 volatile timestruc_t hrestime;
895 int64_t hrestime_adj;
896 volatile int hres_lock;
897 uint_t nsec_scale;
898 hrtime_t hrtime_base;
899 int traptrace_use_stick;
900
901 #else /* lint */
902 /*
903 * -- WARNING --
904 *
905 * The following variables MUST be together on a 128-byte boundary.
906 * In addition to the primary performance motivation (having them all
907 * on the same cache line(s)), code here and in the GET*TIME() macros
908 * assumes that they all have the same high 22 address bits (so
909 * there's only one sethi).
910 */
911 .seg ".data"
912 .global timedelta, hres_last_tick, hrestime, hrestime_adj
913 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
914 .global nsec_shift, adj_shift
915
916 /* XXX - above comment claims 128-bytes is necessary */
917 .align 64
918 timedelta:
919 .word 0, 0 /* int64_t */
920 hres_last_tick:
921 .word 0, 0 /* hrtime_t */
922 hrestime:
923 .nword 0, 0 /* 2 longs */
924 hrestime_adj:
925 .word 0, 0 /* int64_t */
926 hres_lock:
927 .word 0
928 nsec_scale:
929 .word 0
930 hrtime_base:
931 .word 0, 0
932 traptrace_use_stick:
933 .word 0
934 nsec_shift:
935 .word NSEC_SHIFT
936 adj_shift:
937 .word ADJ_SHIFT
938
939 #endif /* lint */
940
941
942 /*
943 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
944 * usec_delay(int n) [compatibility - should go one day]
945 * Delay by spinning.
946 *
947 * delay for n microseconds. numbers <= 0 delay 1 usec
948 *
949 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
950 * and variable clock rate for power management requires that we
951 * use %stick to implement this routine.
952 *
953 * For OPL platforms that support the "sleep" instruction, we
954 * conditionally (ifdef'ed) insert a "sleep" instruction in
955 * the loop. Note that theoritically we should have move (duplicated)
956 * the code down to spitfire/us3/opl specific asm files - but this
957 * is alot of code duplication just to add one "sleep" instruction.
958 * We chose less code duplication for this.
959 */
960
961 #if defined(lint)
962
963 /*ARGSUSED*/
964 void
965 drv_usecwait(clock_t n)
966 {}
967
968 /*ARGSUSED*/
969 void
970 usec_delay(int n)
971 {}
972
973 #else /* lint */
974
975 ENTRY(drv_usecwait)
976 ALTENTRY(usec_delay)
977 brlez,a,pn %o0, 0f
978 mov 1, %o0
979 0:
980 sethi %hi(sticks_per_usec), %o1
981 lduw [%o1 + %lo(sticks_per_usec)], %o1
982 mulx %o1, %o0, %o1 ! Scale usec to ticks
983 inc %o1 ! We don't start on a tick edge
984 GET_NATIVE_TIME(%o2, %o3, %o4)
985 add %o1, %o2, %o1
986
987 1:
988 #ifdef _OPL
989 .word 0x81b01060 ! insert "sleep" instruction
990 #endif /* _OPL */ ! use byte code for now
991 cmp %o1, %o2
992 GET_NATIVE_TIME(%o2, %o3, %o4)
993 bgeu,pt %xcc, 1b
994 nop
995 retl
996 nop
997 SET_SIZE(usec_delay)
998 SET_SIZE(drv_usecwait)
999 #endif /* lint */
1000
1001 #if defined(lint)
1002
1003 /* ARGSUSED */
1004 void
1005 pil14_interrupt(int level)
1006 {}
1007
1008 #else /* lint */
1009
1010 /*
1011 * Level-14 interrupt prologue.
1012 */
1013 ENTRY_NP(pil14_interrupt)
1014 CPU_ADDR(%g1, %g2)
1015 rdpr %pil, %g6 ! %g6 = interrupted PIL
1016 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
1017 rdpr %tstate, %g6
1018 rdpr %tpc, %g5
1019 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1020 bnz,a,pt %xcc, 1f
1021 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
1022 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
1023 ba pil_interrupt_common ! must be large-disp branch
1024 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
1025 1: ba pil_interrupt_common ! must be large-disp branch
1026 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
1027 SET_SIZE(pil14_interrupt)
1028
1029 ENTRY_NP(tick_rtt)
1079 ! If we're here, then we have programmed TICK_COMPARE with a %tick
1080 ! which is in the past; we'll now load an initial step size, and loop
1081 ! until we've managed to program TICK_COMPARE to fire in the future.
1082 !
1083 mov 8, %o4 ! 8 = arbitrary inital step
1084 1: add %o0, %o4, %o5 ! Add the step
1085 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR
1086 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1087 sllx %o0, 1, %o0 ! Clear the DIS bit
1088 srlx %o0, 1, %o0
1089 cmp %o5, %o0 ! In the future?
1090 bg,a,pt %xcc, 2f ! Yes, drive on.
1091 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1092 ba 1b ! No, try again.
1093 sllx %o4, 1, %o4 ! delay: double step size
1094
1095 2: ba current_thread_complete
1096 nop
1097 SET_SIZE(tick_rtt)
1098
1099 #endif /* lint */
1100
1101 #if defined(lint)
1102
1103 /* ARGSUSED */
1104 void
1105 pil15_interrupt(int level)
1106 {}
1107
1108 #else /* lint */
1109
1110 /*
1111 * Level-15 interrupt prologue.
1112 */
1113 ENTRY_NP(pil15_interrupt)
1114 CPU_ADDR(%g1, %g2)
1115 rdpr %tstate, %g6
1116 rdpr %tpc, %g5
1117 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1118 bnz,a,pt %xcc, 1f
1119 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC
1120 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
1121 ba pil15_epilogue ! must be large-disp branch
1122 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC
1123 1: ba pil15_epilogue ! must be large-disp branch
1124 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
1125 SET_SIZE(pil15_interrupt)
1126
1127 #endif /* lint */
1128
1129 #if defined(lint) || defined(__lint)
1130
1131 /* ARGSUSED */
1132 uint64_t
1133 find_cpufrequency(volatile uchar_t *clock_ptr)
1134 {
1135 return (0);
1136 }
1137
1138 #else /* lint */
1139
1140 #ifdef DEBUG
1141 .seg ".text"
1142 find_cpufreq_panic:
1143 .asciz "find_cpufrequency: interrupts already disabled on entry"
1144 #endif /* DEBUG */
1145
1146 ENTRY_NP(find_cpufrequency)
1147 rdpr %pstate, %g1
1148
1149 #ifdef DEBUG
1150 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
1151 bnz 0f ! are currently enabled
1152 sethi %hi(find_cpufreq_panic), %o1
1153 call panic
1154 or %o1, %lo(find_cpufreq_panic), %o0
1155 #endif /* DEBUG */
1156
1157 0:
1158 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
1159 3:
1167
1168 brz,pn %o2, 3b ! if the minutes just rolled over,
1169 ! the last second could have been
1170 ! inaccurate; try again.
1171 mov %o2, %o4 ! delay: store init. val. in %o2
1172 2:
1173 GET_NATIVE_TIME(%o5, %g4, %g5)
1174 cmp %o2, %o4 ! did the seconds register roll over?
1175 be,pt %icc, 2b ! branch back if unchanged
1176 ldub [%o0], %o4 ! delay: load the new seconds val
1177
1178 brz,pn %o4, 0b ! if the minutes just rolled over,
1179 ! the last second could have been
1180 ! inaccurate; try again.
1181 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts
1182
1183 retl
1184 sub %o5, %o3, %o0 ! return the difference in ticks
1185 SET_SIZE(find_cpufrequency)
1186
1187 #endif /* lint */
1188
1189 #if defined(lint)
1190 /*
1191 * Prefetch a page_t for write or read, this assumes a linear
1192 * scan of sequential page_t's.
1193 */
1194 /*ARGSUSED*/
1195 void
1196 prefetch_page_w(void *pp)
1197 {}
1198
1199 /*ARGSUSED*/
1200 void
1201 prefetch_page_r(void *pp)
1202 {}
1203 #else /* lint */
1204
1205 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1206 defined(SERRANO)
1207 !
1208 ! On US-III, the prefetch instruction queue is 8 entries deep.
1209 ! Also, prefetches for write put data in the E$, which has
1210 ! lines of 512 bytes for an 8MB cache. Each E$ line is further
1211 ! subblocked into 64 byte chunks.
1212 !
1213 ! Since prefetch can only bring in 64 bytes at a time (See Sparc
1214 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1215 ! then 2 prefetches are required in order to bring an entire
1216 ! page into the E$.
1217 !
1218 ! Since the prefetch queue is 8 entries deep, we currently can
1219 ! only have 4 prefetches for page_t's outstanding. Thus, we
1220 ! prefetch n+4 ahead of where we are now:
1221 !
1222 ! 4 * sizeof(page_t) -> 512
1223 ! 4 * sizeof(page_t) +64 -> 576
1224 !
1341 #define STRIDE1 0x440
1342 #define STRIDE2 0x640
1343
1344 ENTRY(prefetch_page_w)
1345 prefetch [%o0+STRIDE1], #n_writes
1346 retl
1347 prefetch [%o0+STRIDE2], #n_writes
1348 SET_SIZE(prefetch_page_w)
1349
1350 ENTRY(prefetch_page_r)
1351 prefetch [%o0+STRIDE1], #n_writes
1352 retl
1353 prefetch [%o0+STRIDE2], #n_writes
1354 SET_SIZE(prefetch_page_r)
1355 #else /* OLYMPUS_C */
1356
1357 #error "You need to fix this for your new cpu type."
1358
1359 #endif /* OLYMPUS_C */
1360
1361 #endif /* lint */
1362
1363 #if defined(lint)
1364 /*
1365 * Prefetch struct smap for write.
1366 */
1367 /*ARGSUSED*/
1368 void
1369 prefetch_smap_w(void *smp)
1370 {}
1371 #else /* lint */
1372
1373 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1374 defined(SERRANO)
1375
1376 #define PREFETCH_Q_LEN 8
1377
1378 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1379
1380 #define PREFETCH_Q_LEN 3
1381
1382 #elif defined(OLYMPUS_C)
1383 !
1384 ! Use length of one for now.
1385 !
1386 #define PREFETCH_Q_LEN 1
1387
1388 #else /* OLYMPUS_C */
1389
1390 #error You need to fix this for your new cpu type.
1391
1392 #endif /* OLYMPUS_C */
1404 ! The hardware will prefetch the 64 byte cache aligned block
1405 ! that contains the address specified in the prefetch instruction.
1406 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1407 ! per pass will suffice as long as we prefetch far enough ahead to
1408 ! make sure we don't stall for the cases where the smap object
1409 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1410 ! ahead as the hardware will allow.
1411 !
1412 ! The smap array is processed with decreasing address pointers.
1413 !
1414 #define SMAP_SIZE 48
1415 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1416
1417 #endif /* SEGKPM_SUPPORT */
1418
1419 ENTRY(prefetch_smap_w)
1420 retl
1421 prefetch [%o0-SMAP_STRIDE], #n_writes
1422 SET_SIZE(prefetch_smap_w)
1423
1424 #endif /* lint */
1425
1426 #if defined(lint) || defined(__lint)
1427
1428 /* ARGSUSED */
1429 uint64_t
1430 getidsr(void)
1431 { return 0; }
1432
1433 #else /* lint */
1434
1435 ENTRY_NP(getidsr)
1436 retl
1437 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1438 SET_SIZE(getidsr)
1439
1440 #endif /* lint */
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include "assym.h"
26
27 /*
28 * General assembly language routines.
29 * It is the intent of this file to contain routines that are
30 * specific to cpu architecture.
31 */
32
33 /*
34 * WARNING: If you add a fast trap handler which can be invoked by a
35 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
36 * instead of "done" instruction to return back to the user mode. See
37 * comments for the "fast_trap_done" entry point for more information.
38 */
39 #define FAST_TRAP_DONE \
40 ba,a fast_trap_done
41
42 /*
43 * Override GET_NATIVE_TIME for the cpu module code. This is not
44 * guaranteed to be exactly one instruction, be careful of using
45 * the macro in delay slots.
194 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
195 * with a read, thus stalling the pipe and keeping following instructions
196 * from causing data corruption. Aligning to a quadword will ensure these
197 * two instructions are not split due to i$ misses.
198 */
199 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
200 ba,a .bb_errata_1.label ;\
201 .align 64 ;\
202 .bb_errata_1.label: ;\
203 wr cmpr, TICK_COMPARE ;\
204 rd TICK_COMPARE, %g0
205 #else /* BB_ERRATA_1 */
206 #define WR_TICKCMPR(in,scr1,scr2,label) \
207 wr in, TICK_COMPARE
208 #endif /* BB_ERRATA_1 */
209
210 #endif /* !CHEETAH && !HUMMINGBIRD */
211
212 #include <sys/clock.h>
213
214
215 #include <sys/asm_linkage.h>
216 #include <sys/privregs.h>
217 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
218 #include <sys/machthread.h>
219 #include <sys/clock.h>
220 #include <sys/intreg.h>
221 #include <sys/psr_compat.h>
222 #include <sys/isa_defs.h>
223 #include <sys/dditypes.h>
224 #include <sys/intr.h>
225
226 #include "assym.h"
227
228 ENTRY(get_impl)
229 GET_CPU_IMPL(%o0)
230 retl
231 nop
232 SET_SIZE(get_impl)
233
234 ENTRY_NP(tickcmpr_set)
235 ! get 64-bit clock_cycles interval
236 mov %o0, %o2
237 mov 8, %o3 ! A reasonable initial step size
238 1:
239 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR
240
241 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the
242 sllx %o0, 1, %o0 ! value we wrote was in the future.
243 srlx %o0, 1, %o0
244
245 cmp %o2, %o0 ! If the value we wrote was in the
246 bg,pt %xcc, 2f ! future, then blow out of here.
247 sllx %o3, 1, %o3 ! If not, then double our step size,
248 ba,pt %xcc, 1b ! and take another lap.
249 add %o0, %o3, %o2 !
250 2:
251 retl
252 nop
253 SET_SIZE(tickcmpr_set)
254
255 ENTRY_NP(tickcmpr_disable)
256 mov 1, %g1
257 sllx %g1, TICKINT_DIS_SHFT, %o0
258 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR
259 retl
260 nop
261 SET_SIZE(tickcmpr_disable)
262
263 #ifdef DEBUG
264 .seg ".text"
265 tick_write_panic:
266 .asciz "tick_write_delta: interrupts already disabled on entry"
267 #endif /* DEBUG */
268
269 ENTRY_NP(tick_write_delta)
270 rdpr %pstate, %g1
271 #ifdef DEBUG
272 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
273 bnz 0f ! aren't already disabled.
274 sethi %hi(tick_write_panic), %o1
275 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
276 call panic
277 or %i1, %lo(tick_write_panic), %o0
278 #endif /* DEBUG */
279 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
280 mov %o0, %o2
281 ba 0f ! Branch to cache line-aligned instr.
282 nop
283 .align 16
284 0: nop ! The next 3 instructions are now hot.
285 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
286
287 retl ! Return
288 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
289
290 ENTRY_NP(tickcmpr_disabled)
291 RD_TICKCMPR(%g1, %o0)
292 retl
293 srlx %g1, TICKINT_DIS_SHFT, %o0
294 SET_SIZE(tickcmpr_disabled)
295
296 /*
297 * Get current tick
298 */
299
300 ENTRY(gettick)
301 ALTENTRY(randtick)
302 GET_NATIVE_TIME(%o0, %o2, %o3)
303 retl
304 nop
305 SET_SIZE(randtick)
306 SET_SIZE(gettick)
307
308
309 /*
310 * Return the counter portion of the tick register.
311 */
312
313 ENTRY_NP(gettick_counter)
314 rdpr %tick, %o0
315 sllx %o0, 1, %o0
316 retl
317 srlx %o0, 1, %o0 ! shake off npt bit
318 SET_SIZE(gettick_counter)
319
320 /*
321 * Provide a C callable interface to the trap that reads the hi-res timer.
322 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
323 */
324
325 ENTRY_NP(gethrtime)
326 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
327 ! %g1 = hrtime
328 retl
329 mov %g1, %o0
330 SET_SIZE(gethrtime)
331
332 ENTRY_NP(gethrtime_unscaled)
333 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
334 retl
335 mov %g1, %o0
336 SET_SIZE(gethrtime_unscaled)
337
338 ENTRY_NP(gethrtime_waitfree)
339 ALTENTRY(dtrace_gethrtime)
340 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
341 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
342 retl
343 mov %g1, %o0
344 SET_SIZE(dtrace_gethrtime)
596 inc %i1 ! release lock
597 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock
598
599 ret
600 restore
601
602 9:
603 !
604 ! release hres_lock
605 !
606 ld [%l4 + %lo(hres_lock)], %i1
607 inc %i1
608 st %i1, [%l4 + %lo(hres_lock)]
609
610 sethi %hi(hrtime_base_panic), %o0
611 call panic
612 or %o0, %lo(hrtime_base_panic), %o0
613
614 SET_SIZE(hres_tick)
615
616 .seg ".text"
617 kstat_q_panic_msg:
618 .asciz "kstat_q_exit: qlen == 0"
619
620 ENTRY(kstat_q_panic)
621 save %sp, -SA(MINFRAME), %sp
622 sethi %hi(kstat_q_panic_msg), %o0
623 call panic
624 or %o0, %lo(kstat_q_panic_msg), %o0
625 /*NOTREACHED*/
626 SET_SIZE(kstat_q_panic)
627
628 #define BRZPN brz,pn
629 #define BRZPT brz,pt
630
631 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
632 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
633 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
634 QBR %o1, QZERO; /* done if qlen == 0 */ \
635 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
705 GET_NATIVE_TIME(%g1, %g2, %g3)
706 #if defined(DEBUG)
707 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
708 #else
709 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
710 #endif
711 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
712 SET_SIZE(kstat_waitq_to_runq)
713
714 .align 16
715 ENTRY(kstat_runq_back_to_waitq)
716 GET_NATIVE_TIME(%g1, %g2, %g3)
717 #if defined(DEBUG)
718 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
719 #else
720 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
721 #endif
722 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
723 SET_SIZE(kstat_runq_back_to_waitq)
724
725 /*
726 * -- WARNING --
727 *
728 * The following variables MUST be together on a 128-byte boundary.
729 * In addition to the primary performance motivation (having them all
730 * on the same cache line(s)), code here and in the GET*TIME() macros
731 * assumes that they all have the same high 22 address bits (so
732 * there's only one sethi).
733 */
734 .seg ".data"
735 .global timedelta, hres_last_tick, hrestime, hrestime_adj
736 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
737 .global nsec_shift, adj_shift
738
739 /* XXX - above comment claims 128-bytes is necessary */
740 .align 64
741 timedelta:
742 .word 0, 0 /* int64_t */
743 hres_last_tick:
744 .word 0, 0 /* hrtime_t */
745 hrestime:
746 .nword 0, 0 /* 2 longs */
747 hrestime_adj:
748 .word 0, 0 /* int64_t */
749 hres_lock:
750 .word 0
751 nsec_scale:
752 .word 0
753 hrtime_base:
754 .word 0, 0
755 traptrace_use_stick:
756 .word 0
757 nsec_shift:
758 .word NSEC_SHIFT
759 adj_shift:
760 .word ADJ_SHIFT
761
762
763 /*
764 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
765 * usec_delay(int n) [compatibility - should go one day]
766 * Delay by spinning.
767 *
768 * delay for n microseconds. numbers <= 0 delay 1 usec
769 *
770 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
771 * and variable clock rate for power management requires that we
772 * use %stick to implement this routine.
773 *
774 * For OPL platforms that support the "sleep" instruction, we
775 * conditionally (ifdef'ed) insert a "sleep" instruction in
776 * the loop. Note that theoritically we should have move (duplicated)
777 * the code down to spitfire/us3/opl specific asm files - but this
778 * is alot of code duplication just to add one "sleep" instruction.
779 * We chose less code duplication for this.
780 */
781
782 ENTRY(drv_usecwait)
783 ALTENTRY(usec_delay)
784 brlez,a,pn %o0, 0f
785 mov 1, %o0
786 0:
787 sethi %hi(sticks_per_usec), %o1
788 lduw [%o1 + %lo(sticks_per_usec)], %o1
789 mulx %o1, %o0, %o1 ! Scale usec to ticks
790 inc %o1 ! We don't start on a tick edge
791 GET_NATIVE_TIME(%o2, %o3, %o4)
792 add %o1, %o2, %o1
793
794 1:
795 #ifdef _OPL
796 .word 0x81b01060 ! insert "sleep" instruction
797 #endif /* _OPL */ ! use byte code for now
798 cmp %o1, %o2
799 GET_NATIVE_TIME(%o2, %o3, %o4)
800 bgeu,pt %xcc, 1b
801 nop
802 retl
803 nop
804 SET_SIZE(usec_delay)
805 SET_SIZE(drv_usecwait)
806
807 /*
808 * Level-14 interrupt prologue.
809 */
810 ENTRY_NP(pil14_interrupt)
811 CPU_ADDR(%g1, %g2)
812 rdpr %pil, %g6 ! %g6 = interrupted PIL
813 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
814 rdpr %tstate, %g6
815 rdpr %tpc, %g5
816 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
817 bnz,a,pt %xcc, 1f
818 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
819 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
820 ba pil_interrupt_common ! must be large-disp branch
821 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
822 1: ba pil_interrupt_common ! must be large-disp branch
823 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
824 SET_SIZE(pil14_interrupt)
825
826 ENTRY_NP(tick_rtt)
876 ! If we're here, then we have programmed TICK_COMPARE with a %tick
877 ! which is in the past; we'll now load an initial step size, and loop
878 ! until we've managed to program TICK_COMPARE to fire in the future.
879 !
880 mov 8, %o4 ! 8 = arbitrary inital step
881 1: add %o0, %o4, %o5 ! Add the step
882 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR
883 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
884 sllx %o0, 1, %o0 ! Clear the DIS bit
885 srlx %o0, 1, %o0
886 cmp %o5, %o0 ! In the future?
887 bg,a,pt %xcc, 2f ! Yes, drive on.
888 wrpr %g0, %g5, %pstate ! delay: enable vec intr
889 ba 1b ! No, try again.
890 sllx %o4, 1, %o4 ! delay: double step size
891
892 2: ba current_thread_complete
893 nop
894 SET_SIZE(tick_rtt)
895
896 /*
897 * Level-15 interrupt prologue.
898 */
899 ENTRY_NP(pil15_interrupt)
900 CPU_ADDR(%g1, %g2)
901 rdpr %tstate, %g6
902 rdpr %tpc, %g5
903 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
904 bnz,a,pt %xcc, 1f
905 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC
906 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
907 ba pil15_epilogue ! must be large-disp branch
908 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC
909 1: ba pil15_epilogue ! must be large-disp branch
910 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
911 SET_SIZE(pil15_interrupt)
912
913 #ifdef DEBUG
914 .seg ".text"
915 find_cpufreq_panic:
916 .asciz "find_cpufrequency: interrupts already disabled on entry"
917 #endif /* DEBUG */
918
919 ENTRY_NP(find_cpufrequency)
920 rdpr %pstate, %g1
921
922 #ifdef DEBUG
923 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
924 bnz 0f ! are currently enabled
925 sethi %hi(find_cpufreq_panic), %o1
926 call panic
927 or %o1, %lo(find_cpufreq_panic), %o0
928 #endif /* DEBUG */
929
930 0:
931 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
932 3:
940
941 brz,pn %o2, 3b ! if the minutes just rolled over,
942 ! the last second could have been
943 ! inaccurate; try again.
944 mov %o2, %o4 ! delay: store init. val. in %o2
945 2:
946 GET_NATIVE_TIME(%o5, %g4, %g5)
947 cmp %o2, %o4 ! did the seconds register roll over?
948 be,pt %icc, 2b ! branch back if unchanged
949 ldub [%o0], %o4 ! delay: load the new seconds val
950
951 brz,pn %o4, 0b ! if the minutes just rolled over,
952 ! the last second could have been
953 ! inaccurate; try again.
954 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts
955
956 retl
957 sub %o5, %o3, %o0 ! return the difference in ticks
958 SET_SIZE(find_cpufrequency)
959
960 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
961 defined(SERRANO)
962 !
963 ! On US-III, the prefetch instruction queue is 8 entries deep.
964 ! Also, prefetches for write put data in the E$, which has
965 ! lines of 512 bytes for an 8MB cache. Each E$ line is further
966 ! subblocked into 64 byte chunks.
967 !
968 ! Since prefetch can only bring in 64 bytes at a time (See Sparc
969 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
970 ! then 2 prefetches are required in order to bring an entire
971 ! page into the E$.
972 !
973 ! Since the prefetch queue is 8 entries deep, we currently can
974 ! only have 4 prefetches for page_t's outstanding. Thus, we
975 ! prefetch n+4 ahead of where we are now:
976 !
977 ! 4 * sizeof(page_t) -> 512
978 ! 4 * sizeof(page_t) +64 -> 576
979 !
1096 #define STRIDE1 0x440
1097 #define STRIDE2 0x640
1098
1099 ENTRY(prefetch_page_w)
1100 prefetch [%o0+STRIDE1], #n_writes
1101 retl
1102 prefetch [%o0+STRIDE2], #n_writes
1103 SET_SIZE(prefetch_page_w)
1104
1105 ENTRY(prefetch_page_r)
1106 prefetch [%o0+STRIDE1], #n_writes
1107 retl
1108 prefetch [%o0+STRIDE2], #n_writes
1109 SET_SIZE(prefetch_page_r)
1110 #else /* OLYMPUS_C */
1111
1112 #error "You need to fix this for your new cpu type."
1113
1114 #endif /* OLYMPUS_C */
1115
1116 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1117 defined(SERRANO)
1118
1119 #define PREFETCH_Q_LEN 8
1120
1121 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1122
1123 #define PREFETCH_Q_LEN 3
1124
1125 #elif defined(OLYMPUS_C)
1126 !
1127 ! Use length of one for now.
1128 !
1129 #define PREFETCH_Q_LEN 1
1130
1131 #else /* OLYMPUS_C */
1132
1133 #error You need to fix this for your new cpu type.
1134
1135 #endif /* OLYMPUS_C */
1147 ! The hardware will prefetch the 64 byte cache aligned block
1148 ! that contains the address specified in the prefetch instruction.
1149 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1150 ! per pass will suffice as long as we prefetch far enough ahead to
1151 ! make sure we don't stall for the cases where the smap object
1152 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1153 ! ahead as the hardware will allow.
1154 !
1155 ! The smap array is processed with decreasing address pointers.
1156 !
1157 #define SMAP_SIZE 48
1158 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1159
1160 #endif /* SEGKPM_SUPPORT */
1161
1162 ENTRY(prefetch_smap_w)
1163 retl
1164 prefetch [%o0-SMAP_STRIDE], #n_writes
1165 SET_SIZE(prefetch_smap_w)
1166
1167 ENTRY_NP(getidsr)
1168 retl
1169 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1170 SET_SIZE(getidsr)
1171
|