Print this page
restore sparc comments
de-linting of .s files
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4u/cpu/common_asm.s
+++ new/usr/src/uts/sun4u/cpu/common_asm.s
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 -#if !defined(lint)
26 25 #include "assym.h"
27 -#endif /* !lint */
28 26
29 27 /*
30 28 * General assembly language routines.
31 29 * It is the intent of this file to contain routines that are
32 30 * specific to cpu architecture.
33 31 */
34 32
35 33 /*
36 34 * WARNING: If you add a fast trap handler which can be invoked by a
37 35 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
38 36 * instead of "done" instruction to return back to the user mode. See
39 37 * comments for the "fast_trap_done" entry point for more information.
40 38 */
41 39 #define FAST_TRAP_DONE \
42 40 ba,a fast_trap_done
43 41
44 42 /*
45 43 * Override GET_NATIVE_TIME for the cpu module code. This is not
46 44 * guaranteed to be exactly one instruction, be careful of using
47 45 * the macro in delay slots.
48 46 *
49 47 * Do not use any instruction that modifies condition codes as the
50 48 * caller may depend on these to remain unchanged across the macro.
51 49 */
52 50 #if defined(CHEETAH) || defined(OLYMPUS_C)
53 51
54 52 #define GET_NATIVE_TIME(out, scr1, scr2) \
55 53 rd STICK, out
56 54 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
57 55 rd STICK, reg; \
58 56 add reg, delta, reg; \
59 57 wr reg, STICK
60 58 #define RD_TICKCMPR(out, scr) \
61 59 rd STICK_COMPARE, out
62 60 #define WR_TICKCMPR(in, scr1, scr2, label) \
63 61 wr in, STICK_COMPARE
64 62
65 63 #elif defined(HUMMINGBIRD)
66 64 #include <sys/spitregs.h>
67 65
68 66 /*
69 67 * the current hummingbird version of %stick and %stick_cmp
70 68 * were both implemented as (2) 32-bit locations in ASI_IO space;
71 69 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
72 70 *
73 71 * 64-bit opcodes are required, but move only 32-bits:
74 72 *
75 73 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst
76 74 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys
77 75 *
78 76 * reg equivalent [phys]ASI_IO
79 77 * ------------------ ---------------
80 78 * %stick_cmp low-32 0x1FE.0000.F060
81 79 * %stick_cmp high-32 0x1FE.0000.F068
82 80 * %stick low-32 0x1FE.0000.F070
83 81 * %stick high-32 0x1FE.0000.F078
84 82 */
85 83 #define HSTC_LOW 0x60 /* stick_cmp low 32-bits */
86 84 #define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */
87 85 #define HST_LOW 0x70 /* stick low 32-bits */
88 86 #define HST_HIGH 0x78 /* stick high 32-bits */
89 87 #define HST_DIFF 0x08 /* low<-->high diff */
90 88
91 89 /*
92 90 * Any change in the number of instructions in SETL41()
93 91 * will affect SETL41_OFF
94 92 */
95 93 #define SETL41(reg, byte) \
96 94 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \
97 95 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \
98 96 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \
99 97 or reg, byte, reg; /* 0000.01FE.0000.F0xx */
100 98
101 99 /*
102 100 * SETL41_OFF is used to calulate the relative PC value when a
103 101 * branch instruction needs to go over SETL41() macro
104 102 */
105 103 #define SETL41_OFF 16
106 104
107 105 /*
108 106 * reading stick requires 2 loads, and there could be an intervening
109 107 * low-to-high 32-bit rollover resulting in a return value that is
110 108 * off by about (2 ^ 32); this rare case is prevented by re-reading
111 109 * the low-32 bits after the high-32 and verifying the "after" value
112 110 * is >= the "before" value; if not, increment the high-32 value.
113 111 *
114 112 * this method is limited to 1 rollover, and based on the fixed
115 113 * stick-frequency (5555555), requires the loads to complete within
116 114 * 773 seconds; incrementing the high-32 value will not overflow for
117 115 * about 52644 years.
118 116 *
119 117 * writing stick requires 2 stores; if the old/new low-32 value is
120 118 * near 0xffffffff, there could be another rollover (also rare).
121 119 * to prevent this, we first write a 0 to the low-32, then write
122 120 * new values to the high-32 then the low-32.
123 121 *
124 122 * When we detect a carry in the lower %stick register, we need to
125 123 * read HST_HIGH again. However at the point where we detect this,
126 124 * we need to rebuild the register address HST_HIGH.This involves more
127 125 * than one instructions and a branch is unavoidable. However, most of
128 126 * the time, there is no carry. So we take the penalty of a branch
129 127 * instruction only when there is carry (less frequent).
130 128 *
131 129 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
132 130 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
133 131 * addr already points to HST_LOW.
134 132 *
135 133 * NOTE: this method requires disabling interrupts before using
136 134 * DELTA_NATIVE_TIME.
137 135 */
138 136 #define GET_NATIVE_TIME(out, scr, tmp) \
139 137 SETL41(scr, HST_LOW); \
140 138 ldxa [scr]ASI_IO, tmp; \
141 139 inc HST_DIFF, scr; \
142 140 ldxa [scr]ASI_IO, out; \
143 141 dec HST_DIFF, scr; \
144 142 ldxa [scr]ASI_IO, scr; \
145 143 sub scr, tmp, tmp; \
146 144 brlz,pn tmp, .-(SETL41_OFF+24); \
147 145 sllx out, 32, out; \
148 146 or out, scr, out
149 147 #define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
150 148 SETL41(addr, HST_LOW); \
151 149 ldxa [addr]ASI_IO, tmp; \
152 150 inc HST_DIFF, addr; \
153 151 ldxa [addr]ASI_IO, high; \
154 152 dec HST_DIFF, addr; \
155 153 ldxa [addr]ASI_IO, low; \
156 154 sub low, tmp, tmp; \
157 155 brlz,pn tmp, .-24; \
158 156 sllx high, 32, high; \
159 157 or high, low, high; \
160 158 add high, delta, high; \
161 159 srl high, 0, low; \
162 160 srlx high, 32, high; \
163 161 stxa %g0, [addr]ASI_IO; \
164 162 inc HST_DIFF, addr; \
165 163 stxa high, [addr]ASI_IO; \
166 164 dec HST_DIFF, addr; \
167 165 stxa low, [addr]ASI_IO
168 166 #define RD_TICKCMPR(out, scr) \
169 167 SETL41(scr, HSTC_LOW); \
170 168 ldxa [scr]ASI_IO, out; \
171 169 inc HST_DIFF, scr; \
172 170 ldxa [scr]ASI_IO, scr; \
173 171 sllx scr, 32, scr; \
174 172 or scr, out, out
175 173 #define WR_TICKCMPR(in, scra, scrd, label) \
176 174 SETL41(scra, HSTC_HIGH); \
177 175 srlx in, 32, scrd; \
178 176 stxa scrd, [scra]ASI_IO; \
179 177 dec HST_DIFF, scra; \
180 178 stxa in, [scra]ASI_IO
181 179
182 180 #else /* !CHEETAH && !HUMMINGBIRD */
183 181
184 182 #define GET_NATIVE_TIME(out, scr1, scr2) \
185 183 rdpr %tick, out
186 184 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
187 185 rdpr %tick, reg; \
188 186 add reg, delta, reg; \
189 187 wrpr reg, %tick
190 188 #define RD_TICKCMPR(out, scr) \
191 189 rd TICK_COMPARE, out
192 190 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
193 191 /*
194 192 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
195 193 * The failure occurs only when the following instruction decodes to wr or
196 194 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
197 195 * with a read, thus stalling the pipe and keeping following instructions
198 196 * from causing data corruption. Aligning to a quadword will ensure these
199 197 * two instructions are not split due to i$ misses.
200 198 */
201 199 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
202 200 ba,a .bb_errata_1.label ;\
203 201 .align 64 ;\
204 202 .bb_errata_1.label: ;\
205 203 wr cmpr, TICK_COMPARE ;\
↓ open down ↓ |
168 lines elided |
↑ open up ↑ |
206 204 rd TICK_COMPARE, %g0
207 205 #else /* BB_ERRATA_1 */
208 206 #define WR_TICKCMPR(in,scr1,scr2,label) \
209 207 wr in, TICK_COMPARE
210 208 #endif /* BB_ERRATA_1 */
211 209
212 210 #endif /* !CHEETAH && !HUMMINGBIRD */
213 211
214 212 #include <sys/clock.h>
215 213
216 -#if defined(lint)
217 -#include <sys/types.h>
218 -#include <sys/scb.h>
219 -#include <sys/systm.h>
220 -#include <sys/regset.h>
221 -#include <sys/sunddi.h>
222 -#include <sys/lockstat.h>
223 -#endif /* lint */
224 214
225 -
226 215 #include <sys/asm_linkage.h>
227 216 #include <sys/privregs.h>
228 217 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
229 218 #include <sys/machthread.h>
230 219 #include <sys/clock.h>
231 220 #include <sys/intreg.h>
232 221 #include <sys/psr_compat.h>
233 222 #include <sys/isa_defs.h>
234 223 #include <sys/dditypes.h>
235 224 #include <sys/intr.h>
236 225
237 -#if !defined(lint)
238 226 #include "assym.h"
239 -#endif /* !lint */
240 227
241 -#if defined(lint)
242 -
243 -uint_t
244 -get_impl(void)
245 -{ return (0); }
246 -
247 -#else /* lint */
248 -
249 228 ENTRY(get_impl)
250 229 GET_CPU_IMPL(%o0)
251 230 retl
252 231 nop
253 232 SET_SIZE(get_impl)
254 233
255 -#endif /* lint */
256 -
257 -#if defined(lint)
258 234 /*
259 - * Softint generated when counter field of tick reg matches value field
235 + * Softint generated when counter field of tick reg matches value field
260 236 * of tick_cmpr reg
261 237 */
262 -/*ARGSUSED*/
263 -void
264 -tickcmpr_set(uint64_t clock_cycles)
265 -{}
266 -
267 -#else /* lint */
268 -
269 238 ENTRY_NP(tickcmpr_set)
270 239 ! get 64-bit clock_cycles interval
271 240 mov %o0, %o2
272 241 mov 8, %o3 ! A reasonable initial step size
273 242 1:
274 243 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR
275 244
276 245 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the
277 246 sllx %o0, 1, %o0 ! value we wrote was in the future.
278 247 srlx %o0, 1, %o0
279 248
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
280 249 cmp %o2, %o0 ! If the value we wrote was in the
281 250 bg,pt %xcc, 2f ! future, then blow out of here.
282 251 sllx %o3, 1, %o3 ! If not, then double our step size,
283 252 ba,pt %xcc, 1b ! and take another lap.
284 253 add %o0, %o3, %o2 !
285 254 2:
286 255 retl
287 256 nop
288 257 SET_SIZE(tickcmpr_set)
289 258
290 -#endif /* lint */
291 -
292 -#if defined(lint)
293 -
294 -void
295 -tickcmpr_disable(void)
296 -{}
297 -
298 -#else /* lint */
299 -
300 259 ENTRY_NP(tickcmpr_disable)
301 260 mov 1, %g1
302 261 sllx %g1, TICKINT_DIS_SHFT, %o0
303 262 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR
304 263 retl
305 264 nop
306 265 SET_SIZE(tickcmpr_disable)
307 266
308 -#endif /* lint */
267 +#ifdef DEBUG
268 + .seg ".text"
269 +tick_write_panic:
270 + .asciz "tick_write_delta: interrupts already disabled on entry"
271 +#endif /* DEBUG */
309 272
310 -#if defined(lint)
311 -
312 273 /*
313 274 * tick_write_delta() increments %tick by the specified delta. This should
314 275 * only be called after a CPR event to assure that gethrtime() continues to
315 276 * increase monotonically. Obviously, writing %tick needs to de done very
316 277 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
317 278 * this reason, we make sure we're i-cache hot before actually writing to
318 279 * %tick.
319 280 */
320 -/*ARGSUSED*/
321 -void
322 -tick_write_delta(uint64_t delta)
323 -{}
324 -
325 -#else /* lint */
326 -
327 -#ifdef DEBUG
328 - .seg ".text"
329 -tick_write_panic:
330 - .asciz "tick_write_delta: interrupts already disabled on entry"
331 -#endif /* DEBUG */
332 -
333 281 ENTRY_NP(tick_write_delta)
334 282 rdpr %pstate, %g1
335 283 #ifdef DEBUG
336 284 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
337 285 bnz 0f ! aren't already disabled.
338 286 sethi %hi(tick_write_panic), %o1
339 287 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
340 288 call panic
341 289 or %i1, %lo(tick_write_panic), %o0
342 290 #endif /* DEBUG */
343 291 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
344 292 mov %o0, %o2
345 293 ba 0f ! Branch to cache line-aligned instr.
346 294 nop
347 295 .align 16
348 296 0: nop ! The next 3 instructions are now hot.
349 297 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
350 298
351 299 retl ! Return
352 300 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
353 -#endif /* lint */
354 301
355 -#if defined(lint)
356 -/*
357 - * return 1 if disabled
358 - */
359 -
360 -int
361 -tickcmpr_disabled(void)
362 -{ return (0); }
363 -
364 -#else /* lint */
365 -
366 302 ENTRY_NP(tickcmpr_disabled)
367 303 RD_TICKCMPR(%g1, %o0)
368 304 retl
369 305 srlx %g1, TICKINT_DIS_SHFT, %o0
370 306 SET_SIZE(tickcmpr_disabled)
371 307
372 -#endif /* lint */
373 -
374 308 /*
375 309 * Get current tick
376 310 */
377 -#if defined(lint)
378 311
379 -u_longlong_t
380 -gettick(void)
381 -{ return (0); }
382 -
383 -u_longlong_t
384 -randtick(void)
385 -{ return (0); }
386 -
387 -#else /* lint */
388 -
389 312 ENTRY(gettick)
390 313 ALTENTRY(randtick)
391 314 GET_NATIVE_TIME(%o0, %o2, %o3)
392 315 retl
393 316 nop
394 317 SET_SIZE(randtick)
395 318 SET_SIZE(gettick)
396 319
397 -#endif /* lint */
398 320
399 -
400 321 /*
401 322 * Return the counter portion of the tick register.
402 323 */
403 324
404 -#if defined(lint)
405 -
406 -uint64_t
407 -gettick_counter(void)
408 -{ return(0); }
409 -
410 -#else /* lint */
411 -
412 325 ENTRY_NP(gettick_counter)
413 326 rdpr %tick, %o0
414 327 sllx %o0, 1, %o0
415 328 retl
416 329 srlx %o0, 1, %o0 ! shake off npt bit
417 330 SET_SIZE(gettick_counter)
418 -#endif /* lint */
419 331
420 332 /*
421 333 * Provide a C callable interface to the trap that reads the hi-res timer.
422 334 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
423 335 */
424 336
425 -#if defined(lint)
426 -
427 -hrtime_t
428 -gethrtime(void)
429 -{
430 - return ((hrtime_t)0);
431 -}
432 -
433 -hrtime_t
434 -gethrtime_unscaled(void)
435 -{
436 - return ((hrtime_t)0);
437 -}
438 -
439 -hrtime_t
440 -gethrtime_max(void)
441 -{
442 - return ((hrtime_t)0);
443 -}
444 -
445 -void
446 -scalehrtime(hrtime_t *hrt)
447 -{
448 - *hrt = 0;
449 -}
450 -
451 -void
452 -gethrestime(timespec_t *tp)
453 -{
454 - tp->tv_sec = 0;
455 - tp->tv_nsec = 0;
456 -}
457 -
458 -time_t
459 -gethrestime_sec(void)
460 -{
461 - return (0);
462 -}
463 -
464 -void
465 -gethrestime_lasttick(timespec_t *tp)
466 -{
467 - tp->tv_sec = 0;
468 - tp->tv_nsec = 0;
469 -}
470 -
471 -/*ARGSUSED*/
472 -void
473 -hres_tick(void)
474 -{
475 -}
476 -
477 -void
478 -panic_hres_tick(void)
479 -{
480 -}
481 -
482 -#else /* lint */
483 -
484 337 ENTRY_NP(gethrtime)
485 338 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
486 339 ! %g1 = hrtime
487 340 retl
488 341 mov %g1, %o0
489 342 SET_SIZE(gethrtime)
490 343
491 344 ENTRY_NP(gethrtime_unscaled)
492 345 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
493 346 retl
494 347 mov %g1, %o0
495 348 SET_SIZE(gethrtime_unscaled)
496 349
497 350 ENTRY_NP(gethrtime_waitfree)
498 351 ALTENTRY(dtrace_gethrtime)
499 352 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
500 353 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
501 354 retl
502 355 mov %g1, %o0
503 356 SET_SIZE(dtrace_gethrtime)
504 357 SET_SIZE(gethrtime_waitfree)
505 358
506 359 ENTRY(gethrtime_max)
507 360 NATIVE_TIME_MAX(%g1)
508 361 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
509 362
510 363 ! hrtime_t's are signed, max hrtime_t must be positive
511 364 mov -1, %o2
512 365 brlz,a %g1, 1f
513 366 srlx %o2, 1, %g1
514 367 1:
515 368 retl
516 369 mov %g1, %o0
517 370 SET_SIZE(gethrtime_max)
518 371
519 372 ENTRY(scalehrtime)
520 373 ldx [%o0], %o1
521 374 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
522 375 retl
523 376 stx %o1, [%o0]
524 377 SET_SIZE(scalehrtime)
525 378
526 379 /*
527 380 * Fast trap to return a timestamp, uses trap window, leaves traps
528 381 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1.
529 382 *
530 383 * This is the handler for the ST_GETHRTIME trap.
531 384 */
532 385
533 386 ENTRY_NP(get_timestamp)
534 387 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime
535 388 srlx %g1, 32, %o0 ! %o0 = hi32(%g1)
536 389 srl %g1, 0, %o1 ! %o1 = lo32(%g1)
537 390 FAST_TRAP_DONE
538 391 SET_SIZE(get_timestamp)
539 392
540 393 /*
541 394 * Macro to convert GET_HRESTIME() bits into a timestamp.
542 395 *
543 396 * We use two separate macros so that the platform-dependent GET_HRESTIME()
544 397 * can be as small as possible; CONV_HRESTIME() implements the generic part.
545 398 */
546 399 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
547 400 brz,pt adj, 3f; /* no adjustments, it's easy */ \
548 401 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \
549 402 brlz,pn adj, 2f; /* if hrestime_adj negative */ \
550 403 srlx nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \
551 404 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \
552 405 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \
553 406 ba 3f; /* go convert to sec/nsec */ \
554 407 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
555 408 2: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \
556 409 bge,a,pt %xcc, 3f; /* is adj less negative? */ \
557 410 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \
558 411 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
559 412 3: cmp hrestnsec, nano; /* more than a billion? */ \
560 413 bl,pt %xcc, 4f; /* if not, we're done */ \
561 414 nop; /* delay: do nothing :( */ \
562 415 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \
563 416 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
564 417 ba,a 3b; /* check >= billion again */ \
565 418 4:
566 419
567 420 ENTRY_NP(gethrestime)
568 421 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
569 422 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
570 423 stn %o1, [%o0]
571 424 retl
572 425 stn %o2, [%o0 + CLONGSIZE]
573 426 SET_SIZE(gethrestime)
574 427
575 428 /*
576 429 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
577 430 * seconds.
578 431 */
579 432 ENTRY_NP(gethrestime_sec)
580 433 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
581 434 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
582 435 retl ! %o0 current hrestime seconds
583 436 nop
584 437 SET_SIZE(gethrestime_sec)
585 438
586 439 /*
587 440 * Returns the hrestime on the last tick. This is simpler than gethrestime()
588 441 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick()
589 442 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
590 443 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't
591 444 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
592 445 * it explicitly.)
593 446 */
594 447 ENTRY_NP(gethrestime_lasttick)
595 448 sethi %hi(hres_lock), %o1
596 449 0:
597 450 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value
598 451 membar #LoadLoad ! Load of lock must complete
599 452 andn %o2, 1, %o2 ! Mask off lowest bit
600 453 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds.
601 454 add %o1, %lo(hrestime), %o4
602 455 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds.
603 456 membar #LoadLoad ! All loads must complete
604 457 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value
605 458 cmp %o3, %o2 ! If lock is locked or has
606 459 bne 0b ! changed, retry.
607 460 stn %g1, [%o0] ! Delay: store seconds
608 461 retl
609 462 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds
610 463 SET_SIZE(gethrestime_lasttick)
611 464
612 465 /*
613 466 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1.
614 467 *
615 468 * This is the handler for the ST_GETHRESTIME trap.
616 469 */
617 470
618 471 ENTRY_NP(get_hrestime)
619 472 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
620 473 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
621 474 FAST_TRAP_DONE
622 475 SET_SIZE(get_hrestime)
623 476
624 477 /*
625 478 * Fast trap to return lwp virtual time, uses trap window, leaves traps
626 479 * disabled. Returns a 64-bit number in %o0:%o1, which is the number
627 480 * of nanoseconds consumed.
628 481 *
629 482 * This is the handler for the ST_GETHRVTIME trap.
630 483 *
631 484 * Register usage:
632 485 * %o0, %o1 = return lwp virtual time
633 486 * %o2 = CPU/thread
634 487 * %o3 = lwp
635 488 * %g1 = scratch
636 489 * %g5 = scratch
637 490 */
638 491 ENTRY_NP(get_virtime)
639 492 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks
640 493 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2
641 494 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2
642 495 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3
643 496
644 497 /*
645 498 * Subtract start time of current microstate from time
646 499 * of day to get increment for lwp virtual time.
647 500 */
648 501 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start
649 502 sub %g5, %g1, %g5
650 503
651 504 /*
652 505 * Add current value of ms_acct[LMS_USER]
653 506 */
654 507 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER]
655 508 add %g5, %g1, %g5
656 509 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
657 510
658 511 srl %g5, 0, %o1 ! %o1 = lo32(%g5)
659 512 srlx %g5, 32, %o0 ! %o0 = hi32(%g5)
660 513
661 514 FAST_TRAP_DONE
662 515 SET_SIZE(get_virtime)
663 516
664 517
665 518
666 519 .seg ".text"
667 520 hrtime_base_panic:
668 521 .asciz "hrtime_base stepping back"
669 522
670 523
671 524 ENTRY_NP(hres_tick)
672 525 save %sp, -SA(MINFRAME), %sp ! get a new window
673 526
674 527 sethi %hi(hrestime), %l4
675 528 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking
676 529 7: tst %l5
677 530 bz,pt %xcc, 8f ! if we got it, drive on
678 531 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor
679 532 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
680 533 9: tst %l5
681 534 bz,a,pn %xcc, 7b
682 535 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
683 536 ba,pt %xcc, 9b
684 537 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
685 538 8:
686 539 membar #StoreLoad|#StoreStore
687 540
688 541 !
689 542 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale).
690 543 !
691 544 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base
692 545 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time
693 546 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current
694 547 ! convert native time to nsecs
695 548 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
696 549
697 550 sub %l0, %g1, %i1 ! get accurate nsec delta
698 551
699 552 ldx [%l4 + %lo(hrtime_base)], %l1
700 553 cmp %l1, %l0
701 554 bg,pn %xcc, 9f
702 555 nop
703 556
704 557 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base
705 558
706 559 !
707 560 ! apply adjustment, if any
708 561 !
709 562 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj
710 563 brz %l0, 2f
711 564 ! hrestime_adj == 0 ?
712 565 ! yes, skip adjustments
713 566 clr %l5 ! delay: set adj to zero
714 567 tst %l0 ! is hrestime_adj >= 0 ?
715 568 bge,pt %xcc, 1f ! yes, go handle positive case
716 569 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj
717 570
718 571 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ?
719 572 bl,pt %xcc, 2f ! yes, use current adj
720 573 neg %l5 ! delay: %l5 = -adj
721 574 ba,pt %xcc, 2f
722 575 mov %l0, %l5 ! no, so set adj = hrestime_adj
723 576 1:
724 577 subcc %l0, %l5, %g0 ! hrestime_adj < adj ?
725 578 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj
726 579 mov %l0, %l5 ! delay: adj = hrestime_adj
727 580 2:
728 581 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta
729 582 sub %l0, %l5, %l0 ! timedelta -= adj
730 583
731 584 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta
732 585 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta
733 586
734 587 or %l4, %lo(hrestime), %l2
735 588 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec
736 589 ldn [%l2 + CLONGSIZE], %i3
737 590 add %i3, %l5, %i3 ! hrestime.nsec += adj
738 591 add %i3, %i1, %i3 ! hrestime.nsec += nslt
739 592
740 593 set NANOSEC, %l5 ! %l5 = NANOSEC
741 594 cmp %i3, %l5
742 595 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC
743 596 sethi %hi(one_sec), %i1 ! delay
744 597 add %i2, 0x1, %i2 ! hrestime.tv_sec++
745 598 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC
746 599 mov 0x1, %l5
747 600 st %l5, [%i1 + %lo(one_sec)]
748 601 5:
749 602 stn %i2, [%l2]
750 603 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime
751 604
752 605 membar #StoreStore
753 606
754 607 ld [%l4 + %lo(hres_lock)], %i1
755 608 inc %i1 ! release lock
756 609 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock
757 610
758 611 ret
759 612 restore
760 613
761 614 9:
762 615 !
763 616 ! release hres_lock
764 617 !
↓ open down ↓ |
271 lines elided |
↑ open up ↑ |
765 618 ld [%l4 + %lo(hres_lock)], %i1
766 619 inc %i1
767 620 st %i1, [%l4 + %lo(hres_lock)]
768 621
769 622 sethi %hi(hrtime_base_panic), %o0
770 623 call panic
771 624 or %o0, %lo(hrtime_base_panic), %o0
772 625
773 626 SET_SIZE(hres_tick)
774 627
775 -#endif /* lint */
776 -
777 -#if !defined(lint) && !defined(__lint)
778 -
779 628 .seg ".text"
780 629 kstat_q_panic_msg:
781 630 .asciz "kstat_q_exit: qlen == 0"
782 631
783 632 ENTRY(kstat_q_panic)
784 633 save %sp, -SA(MINFRAME), %sp
785 634 sethi %hi(kstat_q_panic_msg), %o0
786 635 call panic
787 636 or %o0, %lo(kstat_q_panic_msg), %o0
788 637 /*NOTREACHED*/
789 638 SET_SIZE(kstat_q_panic)
790 639
791 640 #define BRZPN brz,pn
792 641 #define BRZPT brz,pt
793 642
794 643 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
795 644 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
796 645 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
797 646 QBR %o1, QZERO; /* done if qlen == 0 */ \
798 647 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
799 648 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \
800 649 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \
801 650 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \
802 651 sub %g1, %o3, %o2; /* %o2 = time delta */ \
803 652 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \
804 653 add %o4, %o2, %o4; /* %o4 = new time */ \
805 654 add %o5, %o3, %o5; /* %o5 = new lentime */ \
806 655 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \
807 656 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \
808 657 QRETURN; \
809 658 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
810 659
811 660 #if !defined(DEBUG)
812 661 /*
813 662 * same as KSTAT_Q_UPDATE but without:
814 663 * QBR %o1, QZERO;
815 664 * to be used only with non-debug build. mimics ASSERT() behaviour.
816 665 */
817 666 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
818 667 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
819 668 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
820 669 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
821 670 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \
822 671 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \
823 672 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \
824 673 sub %g1, %o3, %o2; /* %o2 = time delta */ \
825 674 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \
826 675 add %o4, %o2, %o4; /* %o4 = new time */ \
827 676 add %o5, %o3, %o5; /* %o5 = new lentime */ \
828 677 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \
829 678 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \
830 679 QRETURN; \
831 680 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
832 681 #endif
833 682
834 683 .align 16
835 684 ENTRY(kstat_waitq_enter)
836 685 GET_NATIVE_TIME(%g1, %g2, %g3)
837 686 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
838 687 SET_SIZE(kstat_waitq_enter)
839 688
840 689 .align 16
841 690 ENTRY(kstat_waitq_exit)
842 691 GET_NATIVE_TIME(%g1, %g2, %g3)
843 692 #if defined(DEBUG)
844 693 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
845 694 #else
846 695 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W)
847 696 #endif
848 697 SET_SIZE(kstat_waitq_exit)
849 698
850 699 .align 16
851 700 ENTRY(kstat_runq_enter)
852 701 GET_NATIVE_TIME(%g1, %g2, %g3)
853 702 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
854 703 SET_SIZE(kstat_runq_enter)
855 704
856 705 .align 16
857 706 ENTRY(kstat_runq_exit)
858 707 GET_NATIVE_TIME(%g1, %g2, %g3)
859 708 #if defined(DEBUG)
860 709 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
861 710 #else
862 711 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R)
863 712 #endif
864 713 SET_SIZE(kstat_runq_exit)
865 714
866 715 .align 16
867 716 ENTRY(kstat_waitq_to_runq)
868 717 GET_NATIVE_TIME(%g1, %g2, %g3)
869 718 #if defined(DEBUG)
870 719 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
871 720 #else
872 721 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
873 722 #endif
874 723 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
875 724 SET_SIZE(kstat_waitq_to_runq)
876 725
877 726 .align 16
↓ open down ↓ |
89 lines elided |
↑ open up ↑ |
878 727 ENTRY(kstat_runq_back_to_waitq)
879 728 GET_NATIVE_TIME(%g1, %g2, %g3)
880 729 #if defined(DEBUG)
881 730 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
882 731 #else
883 732 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
884 733 #endif
885 734 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
886 735 SET_SIZE(kstat_runq_back_to_waitq)
887 736
888 -#endif /* !(lint || __lint) */
889 -
890 -#ifdef lint
891 -
892 -int64_t timedelta;
893 -hrtime_t hres_last_tick;
894 -volatile timestruc_t hrestime;
895 -int64_t hrestime_adj;
896 -volatile int hres_lock;
897 -uint_t nsec_scale;
898 -hrtime_t hrtime_base;
899 -int traptrace_use_stick;
900 -
901 -#else /* lint */
902 737 /*
903 738 * -- WARNING --
904 739 *
905 740 * The following variables MUST be together on a 128-byte boundary.
906 741 * In addition to the primary performance motivation (having them all
907 742 * on the same cache line(s)), code here and in the GET*TIME() macros
908 743 * assumes that they all have the same high 22 address bits (so
909 744 * there's only one sethi).
910 745 */
911 746 .seg ".data"
912 747 .global timedelta, hres_last_tick, hrestime, hrestime_adj
913 748 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
914 749 .global nsec_shift, adj_shift
915 750
916 751 /* XXX - above comment claims 128-bytes is necessary */
917 752 .align 64
918 753 timedelta:
919 754 .word 0, 0 /* int64_t */
920 755 hres_last_tick:
921 756 .word 0, 0 /* hrtime_t */
922 757 hrestime:
923 758 .nword 0, 0 /* 2 longs */
924 759 hrestime_adj:
925 760 .word 0, 0 /* int64_t */
926 761 hres_lock:
927 762 .word 0
928 763 nsec_scale:
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
929 764 .word 0
930 765 hrtime_base:
931 766 .word 0, 0
932 767 traptrace_use_stick:
933 768 .word 0
934 769 nsec_shift:
935 770 .word NSEC_SHIFT
936 771 adj_shift:
937 772 .word ADJ_SHIFT
938 773
939 -#endif /* lint */
940 774
941 -
942 775 /*
943 776 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
944 777 * usec_delay(int n) [compatibility - should go one day]
945 778 * Delay by spinning.
946 779 *
947 780 * delay for n microseconds. numbers <= 0 delay 1 usec
948 781 *
949 782 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
950 783 * and variable clock rate for power management requires that we
951 784 * use %stick to implement this routine.
952 785 *
953 786 * For OPL platforms that support the "sleep" instruction, we
954 787 * conditionally (ifdef'ed) insert a "sleep" instruction in
955 788 * the loop. Note that theoritically we should have move (duplicated)
956 789 * the code down to spitfire/us3/opl specific asm files - but this
957 790 * is alot of code duplication just to add one "sleep" instruction.
958 791 * We chose less code duplication for this.
959 792 */
960 793
961 -#if defined(lint)
962 -
963 -/*ARGSUSED*/
964 -void
965 -drv_usecwait(clock_t n)
966 -{}
967 -
968 -/*ARGSUSED*/
969 -void
970 -usec_delay(int n)
971 -{}
972 -
973 -#else /* lint */
974 -
975 794 ENTRY(drv_usecwait)
976 795 ALTENTRY(usec_delay)
977 796 brlez,a,pn %o0, 0f
978 797 mov 1, %o0
979 798 0:
980 799 sethi %hi(sticks_per_usec), %o1
981 800 lduw [%o1 + %lo(sticks_per_usec)], %o1
982 801 mulx %o1, %o0, %o1 ! Scale usec to ticks
983 802 inc %o1 ! We don't start on a tick edge
984 803 GET_NATIVE_TIME(%o2, %o3, %o4)
985 804 add %o1, %o2, %o1
986 805
987 806 1:
988 807 #ifdef _OPL
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
989 808 .word 0x81b01060 ! insert "sleep" instruction
990 809 #endif /* _OPL */ ! use byte code for now
991 810 cmp %o1, %o2
992 811 GET_NATIVE_TIME(%o2, %o3, %o4)
993 812 bgeu,pt %xcc, 1b
994 813 nop
995 814 retl
996 815 nop
997 816 SET_SIZE(usec_delay)
998 817 SET_SIZE(drv_usecwait)
999 -#endif /* lint */
1000 818
1001 -#if defined(lint)
1002 -
1003 -/* ARGSUSED */
1004 -void
1005 -pil14_interrupt(int level)
1006 -{}
1007 -
1008 -#else /* lint */
1009 -
1010 819 /*
1011 820 * Level-14 interrupt prologue.
1012 821 */
1013 822 ENTRY_NP(pil14_interrupt)
1014 823 CPU_ADDR(%g1, %g2)
1015 824 rdpr %pil, %g6 ! %g6 = interrupted PIL
1016 825 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
1017 826 rdpr %tstate, %g6
1018 827 rdpr %tpc, %g5
1019 828 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1020 829 bnz,a,pt %xcc, 1f
1021 830 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
1022 831 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
1023 832 ba pil_interrupt_common ! must be large-disp branch
1024 833 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
1025 834 1: ba pil_interrupt_common ! must be large-disp branch
1026 835 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
1027 836 SET_SIZE(pil14_interrupt)
1028 837
1029 838 ENTRY_NP(tick_rtt)
1030 839 !
1031 840 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
1032 841 ! disabled. If TICK_COMPARE is enabled, we know that we need to
1033 842 ! reenqueue the interrupt request structure. We'll then check TICKINT
1034 843 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
1035 844 ! interrupt. In this case, TICK_COMPARE may have been rewritten
1036 845 ! recently; we'll compare %o5 to the current time to verify that it's
1037 846 ! in the future.
1038 847 !
1039 848 ! Note that %o5 is live until after 1f.
1040 849 ! XXX - there is a subroutine call while %o5 is live!
1041 850 !
1042 851 RD_TICKCMPR(%o5, %g1)
1043 852 srlx %o5, TICKINT_DIS_SHFT, %g1
1044 853 brnz,pt %g1, 2f
1045 854 nop
1046 855
1047 856 rdpr %pstate, %g5
1048 857 andn %g5, PSTATE_IE, %g1
1049 858 wrpr %g0, %g1, %pstate ! Disable vec interrupts
1050 859
1051 860 sethi %hi(cbe_level14_inum), %o1
1052 861 ldx [%o1 + %lo(cbe_level14_inum)], %o1
1053 862 call intr_enqueue_req ! preserves %o5 and %g5
1054 863 mov PIL_14, %o0
1055 864
1056 865 ! Check SOFTINT for TICKINT/STICKINT
1057 866 rd SOFTINT, %o4
1058 867 set (TICK_INT_MASK | STICK_INT_MASK), %o0
1059 868 andcc %o4, %o0, %g0
1060 869 bz,a,pn %icc, 2f
1061 870 wrpr %g0, %g5, %pstate ! Enable vec interrupts
1062 871
1063 872 ! clear TICKINT/STICKINT
1064 873 wr %o0, CLEAR_SOFTINT
1065 874
1066 875 !
1067 876 ! Now that we've cleared TICKINT, we can reread %tick and confirm
1068 877 ! that the value we programmed is still in the future. If it isn't,
1069 878 ! we need to reprogram TICK_COMPARE to fire as soon as possible.
1070 879 !
1071 880 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1072 881 sllx %o0, 1, %o0 ! Clear the DIS bit
1073 882 srlx %o0, 1, %o0
1074 883 cmp %o5, %o0 ! In the future?
1075 884 bg,a,pt %xcc, 2f ! Yes, drive on.
1076 885 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1077 886
1078 887 !
1079 888 ! If we're here, then we have programmed TICK_COMPARE with a %tick
1080 889 ! which is in the past; we'll now load an initial step size, and loop
1081 890 ! until we've managed to program TICK_COMPARE to fire in the future.
1082 891 !
1083 892 mov 8, %o4 ! 8 = arbitrary inital step
1084 893 1: add %o0, %o4, %o5 ! Add the step
1085 894 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR
1086 895 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1087 896 sllx %o0, 1, %o0 ! Clear the DIS bit
1088 897 srlx %o0, 1, %o0
↓ open down ↓ |
69 lines elided |
↑ open up ↑ |
1089 898 cmp %o5, %o0 ! In the future?
1090 899 bg,a,pt %xcc, 2f ! Yes, drive on.
1091 900 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1092 901 ba 1b ! No, try again.
1093 902 sllx %o4, 1, %o4 ! delay: double step size
1094 903
1095 904 2: ba current_thread_complete
1096 905 nop
1097 906 SET_SIZE(tick_rtt)
1098 907
1099 -#endif /* lint */
1100 -
1101 -#if defined(lint)
1102 -
1103 -/* ARGSUSED */
1104 -void
1105 -pil15_interrupt(int level)
1106 -{}
1107 -
1108 -#else /* lint */
1109 -
1110 908 /*
1111 909 * Level-15 interrupt prologue.
1112 910 */
1113 911 ENTRY_NP(pil15_interrupt)
1114 912 CPU_ADDR(%g1, %g2)
1115 913 rdpr %tstate, %g6
1116 914 rdpr %tpc, %g5
1117 915 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1118 916 bnz,a,pt %xcc, 1f
1119 917 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC
1120 918 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
1121 919 ba pil15_epilogue ! must be large-disp branch
1122 920 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC
1123 921 1: ba pil15_epilogue ! must be large-disp branch
1124 922 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
1125 923 SET_SIZE(pil15_interrupt)
1126 924
1127 -#endif /* lint */
1128 -
1129 -#if defined(lint) || defined(__lint)
1130 -
1131 -/* ARGSUSED */
1132 -uint64_t
1133 -find_cpufrequency(volatile uchar_t *clock_ptr)
1134 -{
1135 - return (0);
1136 -}
1137 -
1138 -#else /* lint */
1139 -
1140 925 #ifdef DEBUG
1141 926 .seg ".text"
1142 927 find_cpufreq_panic:
1143 928 .asciz "find_cpufrequency: interrupts already disabled on entry"
1144 929 #endif /* DEBUG */
1145 930
1146 931 ENTRY_NP(find_cpufrequency)
1147 932 rdpr %pstate, %g1
1148 933
1149 934 #ifdef DEBUG
1150 935 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
1151 936 bnz 0f ! are currently enabled
1152 937 sethi %hi(find_cpufreq_panic), %o1
1153 938 call panic
1154 939 or %o1, %lo(find_cpufreq_panic), %o0
1155 940 #endif /* DEBUG */
1156 941
1157 942 0:
1158 943 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
1159 944 3:
1160 945 ldub [%o0], %o1 ! Read the number of seconds
1161 946 mov %o1, %o2 ! remember initial value in %o2
1162 947 1:
1163 948 GET_NATIVE_TIME(%o3, %g4, %g5)
1164 949 cmp %o1, %o2 ! did the seconds register roll over?
1165 950 be,pt %icc, 1b ! branch back if unchanged
1166 951 ldub [%o0], %o2 ! delay: load the new seconds val
1167 952
1168 953 brz,pn %o2, 3b ! if the minutes just rolled over,
1169 954 ! the last second could have been
1170 955 ! inaccurate; try again.
1171 956 mov %o2, %o4 ! delay: store init. val. in %o2
1172 957 2:
1173 958 GET_NATIVE_TIME(%o5, %g4, %g5)
1174 959 cmp %o2, %o4 ! did the seconds register roll over?
1175 960 be,pt %icc, 2b ! branch back if unchanged
1176 961 ldub [%o0], %o4 ! delay: load the new seconds val
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
1177 962
1178 963 brz,pn %o4, 0b ! if the minutes just rolled over,
1179 964 ! the last second could have been
1180 965 ! inaccurate; try again.
1181 966 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts
1182 967
1183 968 retl
1184 969 sub %o5, %o3, %o0 ! return the difference in ticks
1185 970 SET_SIZE(find_cpufrequency)
1186 971
1187 -#endif /* lint */
1188 -
1189 -#if defined(lint)
1190 -/*
1191 - * Prefetch a page_t for write or read, this assumes a linear
1192 - * scan of sequential page_t's.
1193 - */
1194 -/*ARGSUSED*/
1195 -void
1196 -prefetch_page_w(void *pp)
1197 -{}
1198 -
1199 -/*ARGSUSED*/
1200 -void
1201 -prefetch_page_r(void *pp)
1202 -{}
1203 -#else /* lint */
1204 -
1205 972 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1206 973 defined(SERRANO)
1207 974 !
1208 975 ! On US-III, the prefetch instruction queue is 8 entries deep.
1209 976 ! Also, prefetches for write put data in the E$, which has
1210 977 ! lines of 512 bytes for an 8MB cache. Each E$ line is further
1211 978 ! subblocked into 64 byte chunks.
1212 979 !
1213 980 ! Since prefetch can only bring in 64 bytes at a time (See Sparc
1214 981 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1215 982 ! then 2 prefetches are required in order to bring an entire
1216 983 ! page into the E$.
1217 984 !
1218 985 ! Since the prefetch queue is 8 entries deep, we currently can
1219 986 ! only have 4 prefetches for page_t's outstanding. Thus, we
1220 987 ! prefetch n+4 ahead of where we are now:
1221 988 !
1222 989 ! 4 * sizeof(page_t) -> 512
1223 990 ! 4 * sizeof(page_t) +64 -> 576
1224 991 !
1225 992 ! Example
1226 993 ! =======
1227 994 ! contiguous page array in memory...
1228 995 !
1229 996 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1230 997 ! ^ ^ ^ ^ ^ ^
1231 998 ! pp | pp+4*sizeof(page)+64
1232 999 ! |
1233 1000 ! pp+4*sizeof(page)
1234 1001 !
1235 1002 ! Prefetch
1236 1003 ! Queue
1237 1004 ! +-------+<--- In this iteration, we're working with pp (AAA1),
1238 1005 ! |Preftch| but we enqueue prefetch for addr = XXX1
1239 1006 ! | XXX1 |
1240 1007 ! +-------+<--- this queue slot will be a prefetch instruction for
1241 1008 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second
1242 1009 ! | XXX2 | half of page XXX)
1243 1010 ! +-------+
1244 1011 ! |Preftch|<-+- The next time around this function, we'll be
1245 1012 ! | YYY1 | | working with pp = BBB1, but will be enqueueing
1246 1013 ! +-------+ | prefetches to for both halves of page YYY,
1247 1014 ! |Preftch| | while both halves of page XXX are in transit
1248 1015 ! | YYY2 |<-+ make their way into the E$.
1249 1016 ! +-------+
1250 1017 ! |Preftch|
1251 1018 ! | ZZZ1 |
1252 1019 ! +-------+
1253 1020 ! . .
1254 1021 ! : :
1255 1022 !
1256 1023 ! E$
1257 1024 ! +============================================...
1258 1025 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1259 1026 ! +============================================...
1260 1027 ! | | | | | | |
1261 1028 ! +============================================...
1262 1029 ! .
1263 1030 ! :
1264 1031 !
1265 1032 ! So we should expect the first four page accesses to stall
1266 1033 ! while we warm up the cache, afterwhich, most of the pages
1267 1034 ! will have their pp ready in the E$.
1268 1035 !
1269 1036 ! Also note that if sizeof(page_t) grows beyond 128, then
1270 1037 ! we'll need an additional prefetch to get an entire page
1271 1038 ! into the E$, thus reducing the number of outstanding page
1272 1039 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1273 1040 ! etc.
1274 1041 !
1275 1042 ! Cheetah+
1276 1043 ! ========
1277 1044 ! On Cheetah+ we use "#n_write" prefetches as these avoid
1278 1045 ! unnecessary RTS->RTO bus transaction state change, and
1279 1046 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
1280 1047 ! PRM). On Cheetah, #n_write prefetches are reflected with
1281 1048 ! RTS->RTO state transition regardless.
1282 1049 !
1283 1050 #define STRIDE1 512
1284 1051 #define STRIDE2 576
1285 1052
1286 1053 #if STRIDE1 != (PAGE_SIZE * 4)
1287 1054 #error "STRIDE1 != (PAGE_SIZE * 4)"
1288 1055 #endif /* STRIDE1 != (PAGE_SIZE * 4) */
1289 1056
1057 +/*
1058 + * Prefetch a page_t for write or read, this assumes a linear
1059 + * scan of sequential page_t's.
1060 + */
1290 1061 ENTRY(prefetch_page_w)
1291 1062 prefetch [%o0+STRIDE1], #n_writes
1292 1063 retl
1293 1064 prefetch [%o0+STRIDE2], #n_writes
1294 1065 SET_SIZE(prefetch_page_w)
1295 1066
1296 1067 !
1297 1068 ! Note on CHEETAH to prefetch for read, we really use #one_write.
1298 1069 ! This fetches to E$ (general use) rather than P$ (floating point use).
1299 1070 !
1300 1071 ENTRY(prefetch_page_r)
1301 1072 prefetch [%o0+STRIDE1], #one_write
1302 1073 retl
1303 1074 prefetch [%o0+STRIDE2], #one_write
1304 1075 SET_SIZE(prefetch_page_r)
1305 1076
1306 1077 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1307 1078
1308 1079 !
1309 1080 ! UltraSparcII can have up to 3 prefetches outstanding.
1310 1081 ! A page_t is 128 bytes (2 prefetches of 64 bytes each)
1311 1082 ! So prefetch for pp + 1, which is
1312 1083 !
1313 1084 ! pp + sizeof(page_t)
1314 1085 ! and
1315 1086 ! pp + sizeof(page_t) + 64
1316 1087 !
1317 1088 #define STRIDE1 128
1318 1089 #define STRIDE2 192
1319 1090
1320 1091 #if STRIDE1 != PAGE_SIZE
1321 1092 #error "STRIDE1 != PAGE_SIZE"
1322 1093 #endif /* STRIDE1 != PAGE_SIZE */
1323 1094
1324 1095 ENTRY(prefetch_page_w)
1325 1096 prefetch [%o0+STRIDE1], #n_writes
1326 1097 retl
1327 1098 prefetch [%o0+STRIDE2], #n_writes
1328 1099 SET_SIZE(prefetch_page_w)
1329 1100
1330 1101 ENTRY(prefetch_page_r)
1331 1102 prefetch [%o0+STRIDE1], #n_reads
1332 1103 retl
1333 1104 prefetch [%o0+STRIDE2], #n_reads
1334 1105 SET_SIZE(prefetch_page_r)
1335 1106
1336 1107 #elif defined(OLYMPUS_C)
1337 1108 !
1338 1109 ! Prefetch strides for Olympus-C
1339 1110 !
1340 1111
1341 1112 #define STRIDE1 0x440
1342 1113 #define STRIDE2 0x640
1343 1114
1344 1115 ENTRY(prefetch_page_w)
1345 1116 prefetch [%o0+STRIDE1], #n_writes
1346 1117 retl
1347 1118 prefetch [%o0+STRIDE2], #n_writes
1348 1119 SET_SIZE(prefetch_page_w)
1349 1120
1350 1121 ENTRY(prefetch_page_r)
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
1351 1122 prefetch [%o0+STRIDE1], #n_writes
1352 1123 retl
1353 1124 prefetch [%o0+STRIDE2], #n_writes
1354 1125 SET_SIZE(prefetch_page_r)
1355 1126 #else /* OLYMPUS_C */
1356 1127
1357 1128 #error "You need to fix this for your new cpu type."
1358 1129
1359 1130 #endif /* OLYMPUS_C */
1360 1131
1361 -#endif /* lint */
1362 -
1363 -#if defined(lint)
1364 -/*
1365 - * Prefetch struct smap for write.
1366 - */
1367 -/*ARGSUSED*/
1368 -void
1369 -prefetch_smap_w(void *smp)
1370 -{}
1371 -#else /* lint */
1372 -
1373 1132 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1374 1133 defined(SERRANO)
1375 1134
1376 1135 #define PREFETCH_Q_LEN 8
1377 1136
1378 1137 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1379 1138
1380 1139 #define PREFETCH_Q_LEN 3
1381 1140
1382 1141 #elif defined(OLYMPUS_C)
1383 1142 !
1384 1143 ! Use length of one for now.
1385 1144 !
1386 1145 #define PREFETCH_Q_LEN 1
1387 1146
1388 1147 #else /* OLYMPUS_C */
1389 1148
1390 1149 #error You need to fix this for your new cpu type.
1391 1150
1392 1151 #endif /* OLYMPUS_C */
1393 1152
1394 1153 #include <vm/kpm.h>
1395 1154
1396 1155 #ifdef SEGKPM_SUPPORT
1397 1156
1398 1157 #define SMAP_SIZE 72
1399 1158 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1400 1159
1401 1160 #else /* SEGKPM_SUPPORT */
1402 1161
1403 1162 !
1404 1163 ! The hardware will prefetch the 64 byte cache aligned block
1405 1164 ! that contains the address specified in the prefetch instruction.
1406 1165 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1407 1166 ! per pass will suffice as long as we prefetch far enough ahead to
1408 1167 ! make sure we don't stall for the cases where the smap object
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
1409 1168 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1410 1169 ! ahead as the hardware will allow.
1411 1170 !
1412 1171 ! The smap array is processed with decreasing address pointers.
1413 1172 !
1414 1173 #define SMAP_SIZE 48
1415 1174 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1416 1175
1417 1176 #endif /* SEGKPM_SUPPORT */
1418 1177
1178 +/*
1179 + * Prefetch struct smap for write.
1180 + */
1419 1181 ENTRY(prefetch_smap_w)
1420 1182 retl
1421 1183 prefetch [%o0-SMAP_STRIDE], #n_writes
1422 1184 SET_SIZE(prefetch_smap_w)
1423 1185
1424 -#endif /* lint */
1425 -
1426 -#if defined(lint) || defined(__lint)
1427 -
1428 -/* ARGSUSED */
1429 -uint64_t
1430 -getidsr(void)
1431 -{ return 0; }
1432 -
1433 -#else /* lint */
1434 -
1435 1186 ENTRY_NP(getidsr)
1436 1187 retl
1437 1188 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1438 1189 SET_SIZE(getidsr)
1439 1190
1440 -#endif /* lint */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX