1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28 #include "assym.h"
29
30 #include <sys/asm_linkage.h>
31 #include <sys/mmu.h>
32 #include <vm/hat_sfmmu.h>
33 #include <sys/machparam.h>
34 #include <sys/machcpuvar.h>
35 #include <sys/machthread.h>
36 #include <sys/machtrap.h>
37 #include <sys/privregs.h>
38 #include <sys/trap.h>
39 #include <sys/cheetahregs.h>
40 #include <sys/us3_module.h>
41 #include <sys/xc_impl.h>
42 #include <sys/intreg.h>
43 #include <sys/async.h>
44 #include <sys/clock.h>
45 #include <sys/cheetahasm.h>
46 #include <sys/cmpregs.h>
47
48 #ifdef TRAPTRACE
49 #include <sys/traptrace.h>
50 #endif /* TRAPTRACE */
51
52 /* BEGIN CSTYLED */
53
54 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \
55 ldxa [%g0]ASI_DCU, tmp1 ;\
56 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\
57 bz,pn %icc, 1f ;\
58 ASM_LD(tmp1, dcache_linesize) ;\
59 ASM_LD(tmp2, dflush_type) ;\
60 cmp tmp2, FLUSHPAGE_TYPE ;\
61 be,pt %icc, 2f ;\
62 nop ;\
63 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\
64 ASM_LD(tmp3, dcache_size) ;\
65 cmp tmp2, FLUSHMATCH_TYPE ;\
66 be,pt %icc, 3f ;\
67 nop ;\
68 /* \
69 * flushtype = FLUSHALL_TYPE, flush the whole thing \
70 * tmp3 = cache size \
71 * tmp1 = cache line size \
72 */ \
73 sub tmp3, tmp1, tmp2 ;\
74 4: \
75 stxa %g0, [tmp2]ASI_DC_TAG ;\
76 membar #Sync ;\
77 cmp %g0, tmp2 ;\
78 bne,pt %icc, 4b ;\
79 sub tmp2, tmp1, tmp2 ;\
80 ba,pt %icc, 1f ;\
81 nop ;\
82 /* \
83 * flushtype = FLUSHPAGE_TYPE \
84 * arg1 = pfn \
85 * arg2 = virtual color \
86 * tmp1 = cache line size \
87 * tmp2 = tag from cache \
88 * tmp3 = counter \
89 */ \
90 2: \
91 set MMU_PAGESIZE, tmp3 ;\
92 sllx arg1, MMU_PAGESHIFT, arg1 /* pfn to 43 bit PA */ ;\
93 sub tmp3, tmp1, tmp3 ;\
94 4: \
95 stxa %g0, [arg1 + tmp3]ASI_DC_INVAL ;\
96 membar #Sync ;\
97 5: \
98 cmp %g0, tmp3 ;\
99 bnz,pt %icc, 4b /* branch if not done */ ;\
100 sub tmp3, tmp1, tmp3 ;\
101 ba,pt %icc, 1f ;\
102 nop ;\
103 /* \
104 * flushtype = FLUSHMATCH_TYPE \
105 * arg1 = tag to compare against \
106 * tmp1 = cache line size \
107 * tmp3 = cache size \
108 * arg2 = counter \
109 * tmp2 = cache tag \
110 */ \
111 3: \
112 sub tmp3, tmp1, arg2 ;\
113 4: \
114 ldxa [arg2]ASI_DC_TAG, tmp2 /* read tag */ ;\
115 btst CHEETAH_DC_VBIT_MASK, tmp2 ;\
116 bz,pn %icc, 5f /* br if no valid sub-blocks */ ;\
117 andn tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\
118 cmp tmp2, arg1 ;\
119 bne,pn %icc, 5f /* branch if tag miss */ ;\
120 nop ;\
121 stxa %g0, [arg2]ASI_DC_TAG ;\
122 membar #Sync ;\
123 5: \
124 cmp %g0, arg2 ;\
125 bne,pt %icc, 4b /* branch if not done */ ;\
126 sub arg2, tmp1, arg2 ;\
127 1:
128
129 /*
130 * macro that flushes the entire dcache color
131 * dcache size = 64K, one way 16K
132 *
133 * In:
134 * arg = virtual color register (not clobbered)
135 * way = way#, can either be a constant or a register (not clobbered)
136 * tmp1, tmp2, tmp3 = scratch registers
137 *
138 */
139 #define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3) \
140 ldxa [%g0]ASI_DCU, tmp1; \
141 btst DCU_DC, tmp1; /* is dcache enabled? */ \
142 bz,pn %icc, 1f; \
143 ASM_LD(tmp1, dcache_linesize) \
144 /* \
145 * arg = virtual color \
146 * tmp1 = cache line size \
147 */ \
148 sllx arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */ \
149 mov way, tmp3; \
150 sllx tmp3, 14, tmp3; /* One way 16K */ \
151 or tmp2, tmp3, tmp3; \
152 set MMU_PAGESIZE, tmp2; \
153 /* \
154 * tmp2 = page size \
155 * tmp3 = cached page in dcache \
156 */ \
157 sub tmp2, tmp1, tmp2; \
158 2: \
159 stxa %g0, [tmp3 + tmp2]ASI_DC_TAG; \
160 membar #Sync; \
161 cmp %g0, tmp2; \
162 bne,pt %icc, 2b; \
163 sub tmp2, tmp1, tmp2; \
164 1:
165
166 /* END CSTYLED */
167
168 /*
169 * Cheetah MMU and Cache operations.
170 */
171
172 ENTRY_NP(vtag_flushpage)
173 /*
174 * flush page from the tlb
175 *
176 * %o0 = vaddr
177 * %o1 = sfmmup
178 */
179 rdpr %pstate, %o5
180 #ifdef DEBUG
181 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
182 #endif /* DEBUG */
183 /*
184 * disable ints
185 */
186 andn %o5, PSTATE_IE, %o4
187 wrpr %o4, 0, %pstate
188
189 /*
190 * Then, blow out the tlb
191 * Interrupts are disabled to prevent the primary ctx register
192 * from changing underneath us.
193 */
194 sethi %hi(ksfmmup), %o3
195 ldx [%o3 + %lo(ksfmmup)], %o3
196 cmp %o3, %o1
197 bne,pt %xcc, 1f ! if not kernel as, go to 1
198 sethi %hi(FLUSH_ADDR), %o3
199 /*
200 * For Kernel demaps use primary. type = page implicitly
201 */
202 stxa %g0, [%o0]ASI_DTLB_DEMAP /* dmmu flush for KCONTEXT */
203 stxa %g0, [%o0]ASI_ITLB_DEMAP /* immu flush for KCONTEXT */
204 flush %o3
205 retl
206 wrpr %g0, %o5, %pstate /* enable interrupts */
207 1:
208 /*
209 * User demap. We need to set the primary context properly.
210 * Secondary context cannot be used for Cheetah IMMU.
211 * %o0 = vaddr
212 * %o1 = sfmmup
213 * %o3 = FLUSH_ADDR
214 */
215 SFMMU_CPU_CNUM(%o1, %g1, %g2) ! %g1 = sfmmu cnum on this CPU
216
217 ldub [%o1 + SFMMU_CEXT], %o4 ! %o4 = sfmmup->sfmmu_cext
218 sll %o4, CTXREG_EXT_SHIFT, %o4
219 or %g1, %o4, %g1 ! %g1 = primary pgsz | cnum
220
221 wrpr %g0, 1, %tl
222 set MMU_PCONTEXT, %o4
223 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
224 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum
225 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz
226 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz
227 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum
228 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum
229
230 stxa %g0, [%o0]ASI_DTLB_DEMAP
231 stxa %g0, [%o0]ASI_ITLB_DEMAP
232 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */
233 flush %o3
234 wrpr %g0, 0, %tl
235
236 retl
237 wrpr %g0, %o5, %pstate /* enable interrupts */
238 SET_SIZE(vtag_flushpage)
239
240 ENTRY_NP2(vtag_flushall, demap_all)
241 /*
242 * flush the tlb
243 */
244 sethi %hi(FLUSH_ADDR), %o3
245 set DEMAP_ALL_TYPE, %g1
246 stxa %g0, [%g1]ASI_DTLB_DEMAP
247 stxa %g0, [%g1]ASI_ITLB_DEMAP
248 flush %o3
249 retl
250 nop
251 SET_SIZE(demap_all)
252 SET_SIZE(vtag_flushall)
253
254
255 ENTRY_NP(vtag_flushpage_tl1)
256 /*
257 * x-trap to flush page from tlb and tsb
258 *
259 * %g1 = vaddr, zero-extended on 32-bit kernel
260 * %g2 = sfmmup
261 *
262 * assumes TSBE_TAG = 0
263 */
264 srln %g1, MMU_PAGESHIFT, %g1
265
266 sethi %hi(ksfmmup), %g3
267 ldx [%g3 + %lo(ksfmmup)], %g3
268 cmp %g3, %g2
269 bne,pt %xcc, 1f ! if not kernel as, go to 1
270 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
271
272 /* We need to demap in the kernel context */
273 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
274 stxa %g0, [%g1]ASI_DTLB_DEMAP
275 stxa %g0, [%g1]ASI_ITLB_DEMAP
276 retry
277 1:
278 /* We need to demap in a user context */
279 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
280
281 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU
282
283 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
284 sll %g4, CTXREG_EXT_SHIFT, %g4
285 or %g6, %g4, %g6 ! %g6 = pgsz | cnum
286
287 set MMU_PCONTEXT, %g4
288 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */
289 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
290 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
291 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */
292 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */
293 stxa %g0, [%g1]ASI_DTLB_DEMAP
294 stxa %g0, [%g1]ASI_ITLB_DEMAP
295 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */
296 retry
297 SET_SIZE(vtag_flushpage_tl1)
298
299
300 ENTRY_NP(vtag_flush_pgcnt_tl1)
301 /*
302 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
303 *
304 * %g1 = vaddr, zero-extended on 32-bit kernel
305 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
306 *
307 * NOTE: this handler relies on the fact that no
308 * interrupts or traps can occur during the loop
309 * issuing the TLB_DEMAP operations. It is assumed
310 * that interrupts are disabled and this code is
311 * fetching from the kernel locked text address.
312 *
313 * assumes TSBE_TAG = 0
314 */
315 set SFMMU_PGCNT_MASK, %g4
316 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */
317 add %g3, 1, %g3 /* g3 = pgcnt */
318
319 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */
320 srln %g1, MMU_PAGESHIFT, %g1
321
322 sethi %hi(ksfmmup), %g4
323 ldx [%g4 + %lo(ksfmmup)], %g4
324 cmp %g4, %g2
325 bne,pn %xcc, 1f /* if not kernel as, go to 1 */
326 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
327
328 /* We need to demap in the kernel context */
329 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
330 set MMU_PAGESIZE, %g2 /* g2 = pgsize */
331 sethi %hi(FLUSH_ADDR), %g5
332 4:
333 stxa %g0, [%g1]ASI_DTLB_DEMAP
334 stxa %g0, [%g1]ASI_ITLB_DEMAP
335 flush %g5 ! flush required by immu
336
337 deccc %g3 /* decr pgcnt */
338 bnz,pt %icc,4b
339 add %g1, %g2, %g1 /* next page */
340 retry
341 1:
342 /*
343 * We need to demap in a user context
344 *
345 * g2 = sfmmup
346 * g3 = pgcnt
347 */
348 SFMMU_CPU_CNUM(%g2, %g5, %g6) ! %g5 = sfmmu cnum on this CPU
349
350 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
351
352 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
353 sll %g4, CTXREG_EXT_SHIFT, %g4
354 or %g5, %g4, %g5
355
356 set MMU_PCONTEXT, %g4
357 ldxa [%g4]ASI_DMMU, %g6 /* rd old ctxnum */
358 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
359 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
360 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */
361 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */
362
363 set MMU_PAGESIZE, %g2 /* g2 = pgsize */
364 sethi %hi(FLUSH_ADDR), %g5
365 3:
366 stxa %g0, [%g1]ASI_DTLB_DEMAP
367 stxa %g0, [%g1]ASI_ITLB_DEMAP
368 flush %g5 ! flush required by immu
369
370 deccc %g3 /* decr pgcnt */
371 bnz,pt %icc,3b
372 add %g1, %g2, %g1 /* next page */
373
374 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */
375 retry
376 SET_SIZE(vtag_flush_pgcnt_tl1)
377
378 ENTRY_NP(vtag_flushall_tl1)
379 /*
380 * x-trap to flush tlb
381 */
382 set DEMAP_ALL_TYPE, %g4
383 stxa %g0, [%g4]ASI_DTLB_DEMAP
384 stxa %g0, [%g4]ASI_ITLB_DEMAP
385 retry
386 SET_SIZE(vtag_flushall_tl1)
387
388
389 /*
390 * vac_flushpage(pfnum, color)
391 * Flush 1 8k page of the D-$ with physical page = pfnum
392 * Algorithm:
393 * The cheetah dcache is a 64k psuedo 4 way accaociative cache.
394 * It is virtual indexed, physically tagged cache.
395 */
396 .seg ".data"
397 .align 8
398 .global dflush_type
399 dflush_type:
400 .word FLUSHPAGE_TYPE
401
402 ENTRY(vac_flushpage)
403 /*
404 * flush page from the d$
405 *
406 * %o0 = pfnum, %o1 = color
407 */
408 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
409 retl
410 nop
411 SET_SIZE(vac_flushpage)
412
413
414 ENTRY_NP(vac_flushpage_tl1)
415 /*
416 * x-trap to flush page from the d$
417 *
418 * %g1 = pfnum, %g2 = color
419 */
420 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
421 retry
422 SET_SIZE(vac_flushpage_tl1)
423
424
425 ENTRY(vac_flushcolor)
426 /*
427 * %o0 = vcolor
428 */
429 DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
430 DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
431 DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
432 DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
433 retl
434 nop
435 SET_SIZE(vac_flushcolor)
436
437
438 ENTRY(vac_flushcolor_tl1)
439 /*
440 * %g1 = vcolor
441 */
442 DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
443 DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
444 DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
445 DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
446 retry
447 SET_SIZE(vac_flushcolor_tl1)
448
449 /*
450 * Determine whether or not the IDSR is busy.
451 * Entry: no arguments
452 * Returns: 1 if busy, 0 otherwise
453 */
454 ENTRY(idsr_busy)
455 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
456 clr %o0
457 btst IDSR_BUSY, %g1
458 bz,a,pt %xcc, 1f
459 mov 1, %o0
460 1:
461 retl
462 nop
463 SET_SIZE(idsr_busy)
464
465 .global _dispatch_status_busy
466 _dispatch_status_busy:
467 .asciz "ASI_INTR_DISPATCH_STATUS error: busy"
468 .align 4
469
470 /*
471 * Setup interrupt dispatch data registers
472 * Entry:
473 * %o0 - function or inumber to call
474 * %o1, %o2 - arguments (2 uint64_t's)
475 */
476 .seg "text"
477
478 ENTRY(init_mondo)
479 #ifdef DEBUG
480 !
481 ! IDSR should not be busy at the moment
482 !
483 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
484 btst IDSR_BUSY, %g1
485 bz,pt %xcc, 1f
486 nop
487 sethi %hi(_dispatch_status_busy), %o0
488 call panic
489 or %o0, %lo(_dispatch_status_busy), %o0
490 #endif /* DEBUG */
491
492 ALTENTRY(init_mondo_nocheck)
493 !
494 ! interrupt vector dispatch data reg 0
495 !
496 1:
497 mov IDDR_0, %g1
498 mov IDDR_1, %g2
499 mov IDDR_2, %g3
500 stxa %o0, [%g1]ASI_INTR_DISPATCH
501
502 !
503 ! interrupt vector dispatch data reg 1
504 !
505 stxa %o1, [%g2]ASI_INTR_DISPATCH
506
507 !
508 ! interrupt vector dispatch data reg 2
509 !
510 stxa %o2, [%g3]ASI_INTR_DISPATCH
511
512 membar #Sync
513 retl
514 nop
515 SET_SIZE(init_mondo_nocheck)
516 SET_SIZE(init_mondo)
517
518
519 #if !(defined(JALAPENO) || defined(SERRANO))
520
521 /*
522 * Ship mondo to aid using busy/nack pair bn
523 */
524 ENTRY_NP(shipit)
525 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id
526 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair
527 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70
528 or %g1, %g2, %g1
529 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch
530 membar #Sync
531 retl
532 nop
533 SET_SIZE(shipit)
534
535 #endif /* !(JALAPENO || SERRANO) */
536
537
538 /*
539 * flush_instr_mem:
540 * Flush 1 page of the I-$ starting at vaddr
541 * %o0 vaddr
542 * %o1 bytes to be flushed
543 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
544 * the stores from all processors so that a FLUSH instruction is only needed
545 * to ensure pipeline is consistent. This means a single flush is sufficient at
546 * the end of a sequence of stores that updates the instruction stream to
547 * ensure correct operation.
548 */
549
550 ENTRY(flush_instr_mem)
551 flush %o0 ! address irrelevant
552 retl
553 nop
554 SET_SIZE(flush_instr_mem)
555
556
557 #if defined(CPU_IMP_ECACHE_ASSOC)
558
559 ENTRY(get_ecache_ctrl)
560 GET_CPU_IMPL(%o0)
561 cmp %o0, JAGUAR_IMPL
562 !
563 ! Putting an ASI access in the delay slot may
564 ! cause it to be accessed, even when annulled.
565 !
566 bne 1f
567 nop
568 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg
569 b 2f
570 nop
571 1:
572 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg
573 2:
574 retl
575 nop
576 SET_SIZE(get_ecache_ctrl)
577
578 #endif /* CPU_IMP_ECACHE_ASSOC */
579
580
581 #if !(defined(JALAPENO) || defined(SERRANO))
582
583 /*
584 * flush_ecache:
585 * %o0 - 64 bit physical address
586 * %o1 - ecache size
587 * %o2 - ecache linesize
588 */
589
590 ENTRY(flush_ecache)
591
592 /*
593 * For certain CPU implementations, we have to flush the L2 cache
594 * before flushing the ecache.
595 */
596 PN_L2_FLUSHALL(%g3, %g4, %g5)
597
598 /*
599 * Flush the entire Ecache using displacement flush.
600 */
601 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
602
603 retl
604 nop
605 SET_SIZE(flush_ecache)
606
607 #endif /* !(JALAPENO || SERRANO) */
608
609
610 ENTRY(flush_dcache)
611 ASM_LD(%o0, dcache_size)
612 ASM_LD(%o1, dcache_linesize)
613 CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
614 retl
615 nop
616 SET_SIZE(flush_dcache)
617
618
619 ENTRY(flush_icache)
620 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
621 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
622 ba,pt %icc, 2f
623 ld [%o0 + CHPR_ICACHE_SIZE], %o0
624 flush_icache_1:
625 ASM_LD(%o0, icache_size)
626 ASM_LD(%o1, icache_linesize)
627 2:
628 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
629 retl
630 nop
631 SET_SIZE(flush_icache)
632
633 ENTRY(kdi_flush_idcache)
634 CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
635 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
636 membar #Sync
637 retl
638 nop
639 SET_SIZE(kdi_flush_idcache)
640
641 ENTRY(flush_pcache)
642 PCACHE_FLUSHALL(%o0, %o1, %o2)
643 retl
644 nop
645 SET_SIZE(flush_pcache)
646
647
648 #if defined(CPU_IMP_L1_CACHE_PARITY)
649
650 /*
651 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t
652 * structure (see cheetahregs.h):
653 * The Dcache *should* be turned off when this code is executed.
654 */
655 .align 128
656 ENTRY(get_dcache_dtag)
657 rdpr %pstate, %o5
658 andn %o5, PSTATE_IE | PSTATE_AM, %o3
659 wrpr %g0, %o3, %pstate
660 b 1f
661 stx %o0, [%o1 + CH_DC_IDX]
662
663 .align 128
664 1:
665 ldxa [%o0]ASI_DC_TAG, %o2
666 stx %o2, [%o1 + CH_DC_TAG]
667 membar #Sync
668 ldxa [%o0]ASI_DC_UTAG, %o2
669 membar #Sync
670 stx %o2, [%o1 + CH_DC_UTAG]
671 ldxa [%o0]ASI_DC_SNP_TAG, %o2
672 stx %o2, [%o1 + CH_DC_SNTAG]
673 add %o1, CH_DC_DATA, %o1
674 clr %o3
675 2:
676 membar #Sync ! required before ASI_DC_DATA
677 ldxa [%o0 + %o3]ASI_DC_DATA, %o2
678 membar #Sync ! required after ASI_DC_DATA
679 stx %o2, [%o1 + %o3]
680 cmp %o3, CH_DC_DATA_REG_SIZE - 8
681 blt 2b
682 add %o3, 8, %o3
683
684 /*
685 * Unlike other CPUs in the family, D$ data parity bits for Panther
686 * do not reside in the microtag. Instead, we have to read them
687 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
688 * of just having 8 parity bits to protect all 32 bytes of data
689 * per line, we now have 32 bits of parity.
690 */
691 GET_CPU_IMPL(%o3)
692 cmp %o3, PANTHER_IMPL
693 bne 4f
694 clr %o3
695
696 /*
697 * move our pointer to the next field where we store parity bits
698 * and add the offset of the last parity byte since we will be
699 * storing all 4 parity bytes within one 64 bit field like this:
700 *
701 * +------+------------+------------+------------+------------+
702 * | - | DC_parity | DC_parity | DC_parity | DC_parity |
703 * | - | for word 3 | for word 2 | for word 1 | for word 0 |
704 * +------+------------+------------+------------+------------+
705 * 63:32 31:24 23:16 15:8 7:0
706 */
707 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
708
709 /* add the DC_data_parity bit into our working index */
710 mov 1, %o2
711 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
712 or %o0, %o2, %o0
713 3:
714 membar #Sync ! required before ASI_DC_DATA
715 ldxa [%o0 + %o3]ASI_DC_DATA, %o2
716 membar #Sync ! required after ASI_DC_DATA
717 stb %o2, [%o1]
718 dec %o1
719 cmp %o3, CH_DC_DATA_REG_SIZE - 8
720 blt 3b
721 add %o3, 8, %o3
722 4:
723 retl
724 wrpr %g0, %o5, %pstate
725 SET_SIZE(get_dcache_dtag)
726
727
728 /*
729 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t
730 * structure (see cheetahregs.h):
731 * The Icache *Must* be turned off when this function is called.
732 * This is because diagnostic accesses to the Icache interfere with cache
733 * consistency.
734 */
735 .align 128
736 ENTRY(get_icache_dtag)
737 rdpr %pstate, %o5
738 andn %o5, PSTATE_IE | PSTATE_AM, %o3
739 wrpr %g0, %o3, %pstate
740
741 stx %o0, [%o1 + CH_IC_IDX]
742 ldxa [%o0]ASI_IC_TAG, %o2
743 stx %o2, [%o1 + CH_IC_PATAG]
744 add %o0, CH_ICTAG_UTAG, %o0
745 ldxa [%o0]ASI_IC_TAG, %o2
746 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
747 stx %o2, [%o1 + CH_IC_UTAG]
748 ldxa [%o0]ASI_IC_TAG, %o2
749 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
750 stx %o2, [%o1 + CH_IC_UPPER]
751 ldxa [%o0]ASI_IC_TAG, %o2
752 andn %o0, CH_ICTAG_TMASK, %o0
753 stx %o2, [%o1 + CH_IC_LOWER]
754 ldxa [%o0]ASI_IC_SNP_TAG, %o2
755 stx %o2, [%o1 + CH_IC_SNTAG]
756 add %o1, CH_IC_DATA, %o1
757 clr %o3
758 2:
759 ldxa [%o0 + %o3]ASI_IC_DATA, %o2
760 stx %o2, [%o1 + %o3]
761 cmp %o3, PN_IC_DATA_REG_SIZE - 8
762 blt 2b
763 add %o3, 8, %o3
764
765 retl
766 wrpr %g0, %o5, %pstate
767 SET_SIZE(get_icache_dtag)
768
769 /*
770 * Get pcache data and tags.
771 * inputs:
772 * pcache_idx - fully constructed VA for for accessing P$ diagnostic
773 * registers. Contains PC_way and PC_addr shifted into
774 * the correct bit positions. See the PRM for more details.
775 * data - pointer to a ch_pc_data_t
776 * structure (see cheetahregs.h):
777 */
778 .align 128
779 ENTRY(get_pcache_dtag)
780 rdpr %pstate, %o5
781 andn %o5, PSTATE_IE | PSTATE_AM, %o3
782 wrpr %g0, %o3, %pstate
783
784 stx %o0, [%o1 + CH_PC_IDX]
785 ldxa [%o0]ASI_PC_STATUS_DATA, %o2
786 stx %o2, [%o1 + CH_PC_STATUS]
787 ldxa [%o0]ASI_PC_TAG, %o2
788 stx %o2, [%o1 + CH_PC_TAG]
789 ldxa [%o0]ASI_PC_SNP_TAG, %o2
790 stx %o2, [%o1 + CH_PC_SNTAG]
791 add %o1, CH_PC_DATA, %o1
792 clr %o3
793 2:
794 ldxa [%o0 + %o3]ASI_PC_DATA, %o2
795 stx %o2, [%o1 + %o3]
796 cmp %o3, CH_PC_DATA_REG_SIZE - 8
797 blt 2b
798 add %o3, 8, %o3
799
800 retl
801 wrpr %g0, %o5, %pstate
802 SET_SIZE(get_pcache_dtag)
803
804 #endif /* CPU_IMP_L1_CACHE_PARITY */
805
806 /*
807 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
808 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
809 * %o0 - 64 bit constant
810 */
811 ENTRY(set_dcu)
812 stxa %o0, [%g0]ASI_DCU ! Store to DCU
813 flush %g0 /* flush required after changing the IC bit */
814 retl
815 nop
816 SET_SIZE(set_dcu)
817
818
819 /*
820 * Return DCU register.
821 */
822 ENTRY(get_dcu)
823 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */
824 retl
825 nop
826 SET_SIZE(get_dcu)
827
828 /*
829 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
830 *
831 * This handler is used to check for softints generated by error trap
832 * handlers to report errors. On Cheetah, this mechanism is used by the
833 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
834 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
835 * NB: Must be 8 instructions or less to fit in trap table and code must
836 * be relocatable.
837 */
838
839 ENTRY_NP(ch_pil15_interrupt_instr)
840 ASM_JMP(%g1, ch_pil15_interrupt)
841 SET_SIZE(ch_pil15_interrupt_instr)
842
843
844 ENTRY_NP(ch_pil15_interrupt)
845
846 /*
847 * Since pil_interrupt is hacked to assume that every level 15
848 * interrupt is generated by the CPU to indicate a performance
849 * counter overflow this gets ugly. Before calling pil_interrupt
850 * the Error at TL>0 pending status is inspected. If it is
851 * non-zero, then an error has occurred and it is handled.
852 * Otherwise control is transfered to pil_interrupt. Note that if
853 * an error is detected pil_interrupt will not be called and
854 * overflow interrupts may be lost causing erroneous performance
855 * measurements. However, error-recovery will have a detrimental
856 * effect on performance anyway.
857 */
858 CPU_INDEX(%g1, %g4)
859 set ch_err_tl1_pending, %g4
860 ldub [%g1 + %g4], %g2
861 brz %g2, 1f
862 nop
863
864 /*
865 * We have a pending TL>0 error, clear the TL>0 pending status.
866 */
867 stb %g0, [%g1 + %g4]
868
869 /*
870 * Clear the softint.
871 */
872 mov 1, %g5
873 sll %g5, PIL_15, %g5
874 wr %g5, CLEAR_SOFTINT
875
876 /*
877 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
878 * to process the Fast ECC/Cache Parity at TL>0 error. Clear
879 * panic flag (%g2).
880 */
881 set cpu_tl1_error, %g1
882 clr %g2
883 ba sys_trap
884 mov PIL_15, %g4
885
886 1:
887 /*
888 * The logout is invalid.
889 *
890 * Call the default interrupt handler.
891 */
892 sethi %hi(pil_interrupt), %g1
893 jmp %g1 + %lo(pil_interrupt)
894 mov PIL_15, %g4
895
896 SET_SIZE(ch_pil15_interrupt)
897
898
899 /*
900 * Error Handling
901 *
902 * Cheetah provides error checking for all memory access paths between
903 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
904 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
905 * AFAR and one of the following traps is generated (provided that it
906 * is enabled in External Cache Error Enable Register) to handle that
907 * error:
908 * 1. trap 0x70: Precise trap
909 * tt0_fecc for errors at trap level(TL)>=0
910 * 2. trap 0x0A and 0x32: Deferred trap
911 * async_err for errors at TL>=0
912 * 3. trap 0x63: Disrupting trap
913 * ce_err for errors at TL=0
914 * (Note that trap 0x63 cannot happen at trap level > 0)
915 *
916 * Trap level one handlers panic the system except for the fast ecc
917 * error handler which tries to recover from certain errors.
918 */
919
920 /*
921 * FAST ECC TRAP STRATEGY:
922 *
923 * Software must handle single and multi bit errors which occur due to data
924 * or instruction cache reads from the external cache. A single or multi bit
925 * error occuring in one of these situations results in a precise trap.
926 *
927 * The basic flow of this trap handler is as follows:
928 *
929 * 1) Record the state and then turn off the Dcache and Icache. The Dcache
930 * is disabled because bad data could have been installed. The Icache is
931 * turned off because we want to capture the Icache line related to the
932 * AFAR.
933 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
934 * 3) Park sibling core if caches are shared (to avoid race condition while
935 * accessing shared resources such as L3 data staging register during
936 * CPU logout.
937 * 4) Read the AFAR and AFSR.
938 * 5) If CPU logout structure is not being used, then:
939 * 6) Clear all errors from the AFSR.
940 * 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
941 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous
942 * state.
943 * 9) Unpark sibling core if we parked it earlier.
944 * 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
945 * running at PIL 15.
946 * 6) Otherwise, if CPU logout structure is being used:
947 * 7) Incriment the "logout busy count".
948 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous
949 * state.
950 * 9) Unpark sibling core if we parked it earlier.
951 * 10) Issue a retry since the other CPU error logging code will end up
952 * finding this error bit and logging information about it later.
953 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
954 * yet initialized such that we can't even check the logout struct, then
955 * we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
956 * call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
957 * to determine information such as TL, TT, CEEN and NCEEN settings, etc
958 * in the high level trap handler since we don't have access to detailed
959 * logout information in cases where the cpu_private struct is not yet
960 * initialized.
961 *
962 * We flush the E$ and D$ here on TL=1 code to prevent getting nested
963 * Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in
964 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
965 * since it is uses different code/data from this handler, has a better
966 * chance of fixing things up than simply recursing through this code
967 * again (this would probably cause an eventual kernel stack overflow).
968 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
969 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
970 * the Fast ECC at TL>0 handler and eventually Red Mode.
971 *
972 * Note that for Cheetah (and only Cheetah), we use alias addresses for
973 * flushing rather than ASI accesses (which don't exist on Cheetah).
974 * Should we encounter a Fast ECC error within this handler on Cheetah,
975 * there's a good chance it's within the ecache_flushaddr buffer (since
976 * it's the largest piece of memory we touch in the handler and it is
977 * usually kernel text/data). For that reason the Fast ECC at TL>0
978 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
979 */
980
981 /*
982 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
983 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
984 * architecture-specific files.
985 * NB: Must be 8 instructions or less to fit in trap table and code must
986 * be relocatable.
987 */
988
989 ENTRY_NP(fecc_err_instr)
990 membar #Sync ! Cheetah requires membar #Sync
991
992 /*
993 * Save current DCU state. Turn off the Dcache and Icache.
994 */
995 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
996 andn %g1, DCU_DC + DCU_IC, %g4
997 stxa %g4, [%g0]ASI_DCU
998 flush %g0 /* flush required after changing the IC bit */
999
1000 ASM_JMP(%g4, fast_ecc_err)
1001 SET_SIZE(fecc_err_instr)
1002
1003
1004 #if !(defined(JALAPENO) || defined(SERRANO))
1005
1006 .section ".text"
1007 .align 64
1008 ENTRY_NP(fast_ecc_err)
1009
1010 /*
1011 * Turn off CEEN and NCEEN.
1012 */
1013 ldxa [%g0]ASI_ESTATE_ERR, %g3
1014 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1015 stxa %g4, [%g0]ASI_ESTATE_ERR
1016 membar #Sync ! membar sync required
1017
1018 /*
1019 * Check to see whether we need to park our sibling core
1020 * before recording diagnostic information from caches
1021 * which may be shared by both cores.
1022 * We use %g1 to store information about whether or not
1023 * we had to park the core (%g1 holds our DCUCR value and
1024 * we only use bits from that register which are "reserved"
1025 * to keep track of core parking) so that we know whether
1026 * or not to unpark later. %g5 and %g4 are scratch registers.
1027 */
1028 PARK_SIBLING_CORE(%g1, %g5, %g4)
1029
1030 /*
1031 * Do the CPU log out capture.
1032 * %g3 = "failed?" return value.
1033 * %g2 = Input = AFAR. Output the clo_flags info which is passed
1034 * into this macro via %g4. Output only valid if cpu_private
1035 * struct has not been initialized.
1036 * CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1037 * %g4 = Trap information stored in the cpu logout flags field
1038 * %g5 = scr1
1039 * %g6 = scr2
1040 * %g3 = scr3
1041 * %g4 = scr4
1042 */
1043 /* store the CEEN and NCEEN values, TL=0 */
1044 and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1045 set CHPR_FECCTL0_LOGOUT, %g6
1046 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1047
1048 /*
1049 * Flush the Ecache (and L2 cache for Panther) to get the error out
1050 * of the Ecache. If the UCC or UCU is on a dirty line, then the
1051 * following flush will turn that into a WDC or WDU, respectively.
1052 */
1053 PN_L2_FLUSHALL(%g4, %g5, %g6)
1054
1055 CPU_INDEX(%g4, %g5)
1056 mulx %g4, CPU_NODE_SIZE, %g4
1057 set cpunodes, %g5
1058 add %g4, %g5, %g4
1059 ld [%g4 + ECACHE_LINESIZE], %g5
1060 ld [%g4 + ECACHE_SIZE], %g4
1061
1062 ASM_LDX(%g6, ecache_flushaddr)
1063 ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1064
1065 /*
1066 * Flush the Dcache. Since bad data could have been installed in
1067 * the Dcache we must flush it before re-enabling it.
1068 */
1069 ASM_LD(%g5, dcache_size)
1070 ASM_LD(%g6, dcache_linesize)
1071 CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1072
1073 /*
1074 * Flush the Icache. Since we turned off the Icache to capture the
1075 * Icache line it is now stale or corrupted and we must flush it
1076 * before re-enabling it.
1077 */
1078 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1079 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
1080 ba,pt %icc, 6f
1081 ld [%g5 + CHPR_ICACHE_SIZE], %g5
1082 fast_ecc_err_5:
1083 ASM_LD(%g5, icache_size)
1084 ASM_LD(%g6, icache_linesize)
1085 6:
1086 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1087
1088 /*
1089 * check to see whether we parked our sibling core at the start
1090 * of this handler. If so, we need to unpark it here.
1091 * We use DCUCR reserved bits (stored in %g1) to keep track of
1092 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1093 */
1094 UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1095
1096 /*
1097 * Restore the Dcache and Icache to the previous state.
1098 */
1099 stxa %g1, [%g0]ASI_DCU
1100 flush %g0 /* flush required after changing the IC bit */
1101
1102 /*
1103 * Make sure our CPU logout operation was successful.
1104 */
1105 cmp %g3, %g0
1106 be 8f
1107 nop
1108
1109 /*
1110 * If the logout structure had been busy, how many times have
1111 * we tried to use it and failed (nesting count)? If we have
1112 * already recursed a substantial number of times, then we can
1113 * assume things are not going to get better by themselves and
1114 * so it would be best to panic.
1115 */
1116 cmp %g3, CLO_NESTING_MAX
1117 blt 7f
1118 nop
1119
1120 call ptl1_panic
1121 mov PTL1_BAD_ECC, %g1
1122
1123 7:
1124 /*
1125 * Otherwise, if the logout structure was busy but we have not
1126 * nested more times than our maximum value, then we simply
1127 * issue a retry. Our TL=0 trap handler code will check and
1128 * clear the AFSR after it is done logging what is currently
1129 * in the logout struct and handle this event at that time.
1130 */
1131 retry
1132 8:
1133 /*
1134 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1135 * already at PIL 15.
1136 */
1137 set cpu_fast_ecc_error, %g1
1138 rdpr %pil, %g4
1139 cmp %g4, PIL_14
1140 ba sys_trap
1141 movl %icc, PIL_14, %g4
1142
1143 SET_SIZE(fast_ecc_err)
1144
1145 #endif /* !(JALAPENO || SERRANO) */
1146
1147
1148 /*
1149 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1150 *
1151 * The basic flow of this trap handler is as follows:
1152 *
1153 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1154 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1155 * will use to save %g1 and %g2.
1156 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1157 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1158 * handler (using the just saved %g1).
1159 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1160 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1161 * NB: we don't turn off the Icache because bad data is not installed nor
1162 * will we be doing any diagnostic accesses.
1163 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1164 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1165 * %tpc, %tnpc, %tstate values previously saved).
1166 * 6) set %tl to %tl - 1.
1167 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1168 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1169 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For
1170 * Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1171 * Save the values in ch_err_tl1_data. For Panther, read the shadow
1172 * AFSR_EXT and save the value in ch_err_tl1_data.
1173 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1174 * being queued. We'll report them via the AFSR/AFAR capture in step 13.
1175 * 11) Flush the Ecache.
1176 * NB: the Ecache is flushed assuming the largest possible size with
1177 * the smallest possible line size since access to the cpu_nodes may
1178 * cause an unrecoverable DTLB miss.
1179 * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1180 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1181 * For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1182 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther,
1183 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1184 * 14) Flush and re-enable the Dcache if it was on at step 3.
1185 * 15) Do TRAPTRACE if enabled.
1186 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1187 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1188 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the
1189 * event pending flag and call cpu_tl1_error via systrap if set.
1190 * 19) Restore the registers from step 5 and issue retry.
1191 */
1192
1193 /*
1194 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1195 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1196 * architecture-specific files. This generates a "Software Trap 0" at TL>0,
1197 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1198 * NB: Must be 8 instructions or less to fit in trap table and code must
1199 * be relocatable.
1200 */
1201
1202 ENTRY_NP(fecc_err_tl1_instr)
1203 CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1204 SET_SIZE(fecc_err_tl1_instr)
1205
1206 /*
1207 * Software trap 0 at TL>0.
1208 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1209 * the various architecture-specific files. This is used as a continuation
1210 * of the fast ecc handling where we've bought an extra TL level, so we can
1211 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1212 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1213 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1214 * order two bits from %g1 and %g2 respectively).
1215 * NB: Must be 8 instructions or less to fit in trap table and code must
1216 * be relocatable.
1217 */
1218
1219 ENTRY_NP(fecc_err_tl1_cont_instr)
1220 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1221 SET_SIZE(fecc_err_tl1_cont_instr)
1222
1223
1224 /*
1225 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1226 *
1227 * AFSR errors bits which cause this trap are:
1228 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1229 *
1230 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1231 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1232 *
1233 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1234 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1235 *
1236 * Cheetah+ also handles (No additional processing required):
1237 * DUE, DTO, DBERR (NCEEN controlled)
1238 * THCE (CEEN and ET_ECC_en controlled)
1239 * TUE (ET_ECC_en controlled)
1240 *
1241 * Panther further adds:
1242 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
1243 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
1244 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled)
1245 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled)
1246 * THCE (CEEN and L2_tag_ECC_en controlled)
1247 * L3_THCE (CEEN and ET_ECC_en controlled)
1248 *
1249 * Steps:
1250 * 1. Disable hardware corrected disrupting errors only (CEEN)
1251 * 2. Park sibling core if caches are shared (to avoid race
1252 * condition while accessing shared resources such as L3
1253 * data staging register during CPU logout.
1254 * 3. If the CPU logout structure is not currently being used:
1255 * 4. Clear AFSR error bits
1256 * 5. Capture Ecache, Dcache and Icache lines associated
1257 * with AFAR.
1258 * 6. Unpark sibling core if we parked it earlier.
1259 * 7. call cpu_disrupting_error via sys_trap at PIL 14
1260 * unless we're already running at PIL 15.
1261 * 4. Otherwise, if the CPU logout structure is busy:
1262 * 5. Incriment "logout busy count" and place into %g3
1263 * 6. Unpark sibling core if we parked it earlier.
1264 * 7. Issue a retry since the other CPU error logging
1265 * code will end up finding this error bit and logging
1266 * information about it later.
1267 * 5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1268 * not yet initialized such that we can't even check the logout
1269 * struct, then we place the clo_flags data into %g2
1270 * (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1271 * systrap. The clo_flags parameter is used to determine information
1272 * such as TL, TT, CEEN settings, etc in the high level trap
1273 * handler since we don't have access to detailed logout information
1274 * in cases where the cpu_private struct is not yet initialized.
1275 *
1276 * %g3: [ logout busy count ] - arg #2
1277 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1278 */
1279
1280 .align 128
1281 ENTRY_NP(ce_err)
1282 membar #Sync ! Cheetah requires membar #Sync
1283
1284 /*
1285 * Disable trap on hardware corrected errors (CEEN) while at TL=0
1286 * to prevent recursion.
1287 */
1288 ldxa [%g0]ASI_ESTATE_ERR, %g1
1289 bclr EN_REG_CEEN, %g1
1290 stxa %g1, [%g0]ASI_ESTATE_ERR
1291 membar #Sync ! membar sync required
1292
1293 /*
1294 * Save current DCU state. Turn off Icache to allow capture of
1295 * Icache data by DO_CPU_LOGOUT.
1296 */
1297 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
1298 andn %g1, DCU_IC, %g4
1299 stxa %g4, [%g0]ASI_DCU
1300 flush %g0 /* flush required after changing the IC bit */
1301
1302 /*
1303 * Check to see whether we need to park our sibling core
1304 * before recording diagnostic information from caches
1305 * which may be shared by both cores.
1306 * We use %g1 to store information about whether or not
1307 * we had to park the core (%g1 holds our DCUCR value and
1308 * we only use bits from that register which are "reserved"
1309 * to keep track of core parking) so that we know whether
1310 * or not to unpark later. %g5 and %g4 are scratch registers.
1311 */
1312 PARK_SIBLING_CORE(%g1, %g5, %g4)
1313
1314 /*
1315 * Do the CPU log out capture.
1316 * %g3 = "failed?" return value.
1317 * %g2 = Input = AFAR. Output the clo_flags info which is passed
1318 * into this macro via %g4. Output only valid if cpu_private
1319 * struct has not been initialized.
1320 * CHPR_CECC_LOGOUT = cpu logout structure offset input
1321 * %g4 = Trap information stored in the cpu logout flags field
1322 * %g5 = scr1
1323 * %g6 = scr2
1324 * %g3 = scr3
1325 * %g4 = scr4
1326 */
1327 clr %g4 ! TL=0 bit in afsr
1328 set CHPR_CECC_LOGOUT, %g6
1329 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1330
1331 /*
1332 * Flush the Icache. Since we turned off the Icache to capture the
1333 * Icache line it is now stale or corrupted and we must flush it
1334 * before re-enabling it.
1335 */
1336 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1337 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
1338 ba,pt %icc, 2f
1339 ld [%g5 + CHPR_ICACHE_SIZE], %g5
1340 ce_err_1:
1341 ASM_LD(%g5, icache_size)
1342 ASM_LD(%g6, icache_linesize)
1343 2:
1344 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1345
1346 /*
1347 * check to see whether we parked our sibling core at the start
1348 * of this handler. If so, we need to unpark it here.
1349 * We use DCUCR reserved bits (stored in %g1) to keep track of
1350 * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1351 */
1352 UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1353
1354 /*
1355 * Restore Icache to previous state.
1356 */
1357 stxa %g1, [%g0]ASI_DCU
1358 flush %g0 /* flush required after changing the IC bit */
1359
1360 /*
1361 * Make sure our CPU logout operation was successful.
1362 */
1363 cmp %g3, %g0
1364 be 4f
1365 nop
1366
1367 /*
1368 * If the logout structure had been busy, how many times have
1369 * we tried to use it and failed (nesting count)? If we have
1370 * already recursed a substantial number of times, then we can
1371 * assume things are not going to get better by themselves and
1372 * so it would be best to panic.
1373 */
1374 cmp %g3, CLO_NESTING_MAX
1375 blt 3f
1376 nop
1377
1378 call ptl1_panic
1379 mov PTL1_BAD_ECC, %g1
1380
1381 3:
1382 /*
1383 * Otherwise, if the logout structure was busy but we have not
1384 * nested more times than our maximum value, then we simply
1385 * issue a retry. Our TL=0 trap handler code will check and
1386 * clear the AFSR after it is done logging what is currently
1387 * in the logout struct and handle this event at that time.
1388 */
1389 retry
1390 4:
1391 /*
1392 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1393 * already at PIL 15.
1394 */
1395 set cpu_disrupting_error, %g1
1396 rdpr %pil, %g4
1397 cmp %g4, PIL_14
1398 ba sys_trap
1399 movl %icc, PIL_14, %g4
1400 SET_SIZE(ce_err)
1401
1402
1403 /*
1404 * This trap cannot happen at TL>0 which means this routine will never
1405 * actually be called and so we treat this like a BAD TRAP panic.
1406 */
1407 .align 64
1408 ENTRY_NP(ce_err_tl1)
1409
1410 call ptl1_panic
1411 mov PTL1_BAD_TRAP, %g1
1412
1413 SET_SIZE(ce_err_tl1)
1414
1415
1416 /*
1417 * The async_err function handles deferred trap types 0xA
1418 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1419 *
1420 * AFSR errors bits which cause this trap are:
1421 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1422 * On some platforms, EMU may causes cheetah to pull the error pin
1423 * never giving Solaris a chance to take a trap.
1424 *
1425 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1426 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1427 *
1428 * Steps:
1429 * 1. Disable CEEN and NCEEN errors to prevent recursive errors.
1430 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1431 * I$ line in DO_CPU_LOGOUT.
1432 * 3. Park sibling core if caches are shared (to avoid race
1433 * condition while accessing shared resources such as L3
1434 * data staging register during CPU logout.
1435 * 4. If the CPU logout structure is not currently being used:
1436 * 5. Clear AFSR error bits
1437 * 6. Capture Ecache, Dcache and Icache lines associated
1438 * with AFAR.
1439 * 7. Unpark sibling core if we parked it earlier.
1440 * 8. call cpu_deferred_error via sys_trap.
1441 * 5. Otherwise, if the CPU logout structure is busy:
1442 * 6. Incriment "logout busy count"
1443 * 7. Unpark sibling core if we parked it earlier.
1444 * 8) Issue a retry since the other CPU error logging
1445 * code will end up finding this error bit and logging
1446 * information about it later.
1447 * 6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1448 * not yet initialized such that we can't even check the logout
1449 * struct, then we place the clo_flags data into %g2
1450 * (sys_trap->have_win arg #1) and call cpu_deferred_error via
1451 * systrap. The clo_flags parameter is used to determine information
1452 * such as TL, TT, CEEN settings, etc in the high level trap handler
1453 * since we don't have access to detailed logout information in cases
1454 * where the cpu_private struct is not yet initialized.
1455 *
1456 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1457 * %g3: [ logout busy count ] - arg #2
1458 */
1459
1460 ENTRY_NP(async_err)
1461 membar #Sync ! Cheetah requires membar #Sync
1462
1463 /*
1464 * Disable CEEN and NCEEN.
1465 */
1466 ldxa [%g0]ASI_ESTATE_ERR, %g3
1467 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1468 stxa %g4, [%g0]ASI_ESTATE_ERR
1469 membar #Sync ! membar sync required
1470
1471 /*
1472 * Save current DCU state.
1473 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1474 * Do this regardless of whether this is a Data Access Error or
1475 * Instruction Access Error Trap.
1476 * Disable Dcache for both Data Access Error and Instruction Access
1477 * Error per Cheetah PRM P.5 Note 6.
1478 */
1479 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
1480 andn %g1, DCU_IC + DCU_DC, %g4
1481 stxa %g4, [%g0]ASI_DCU
1482 flush %g0 /* flush required after changing the IC bit */
1483
1484 /*
1485 * Check to see whether we need to park our sibling core
1486 * before recording diagnostic information from caches
1487 * which may be shared by both cores.
1488 * We use %g1 to store information about whether or not
1489 * we had to park the core (%g1 holds our DCUCR value and
1490 * we only use bits from that register which are "reserved"
1491 * to keep track of core parking) so that we know whether
1492 * or not to unpark later. %g6 and %g4 are scratch registers.
1493 */
1494 PARK_SIBLING_CORE(%g1, %g6, %g4)
1495
1496 /*
1497 * Do the CPU logout capture.
1498 *
1499 * %g3 = "failed?" return value.
1500 * %g2 = Input = AFAR. Output the clo_flags info which is passed
1501 * into this macro via %g4. Output only valid if cpu_private
1502 * struct has not been initialized.
1503 * CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1504 * %g4 = Trap information stored in the cpu logout flags field
1505 * %g5 = scr1
1506 * %g6 = scr2
1507 * %g3 = scr3
1508 * %g4 = scr4
1509 */
1510 andcc %g5, T_TL1, %g0
1511 clr %g6
1512 movnz %xcc, 1, %g6 ! set %g6 if T_TL1 set
1513 sllx %g6, CLO_FLAGS_TL_SHIFT, %g6
1514 sllx %g5, CLO_FLAGS_TT_SHIFT, %g4
1515 set CLO_FLAGS_TT_MASK, %g2
1516 and %g4, %g2, %g4 ! ttype
1517 or %g6, %g4, %g4 ! TT and TL
1518 and %g3, EN_REG_CEEN, %g3 ! CEEN value
1519 or %g3, %g4, %g4 ! TT and TL and CEEN
1520 set CHPR_ASYNC_LOGOUT, %g6
1521 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1522
1523 /*
1524 * If the logout struct was busy, we may need to pass the
1525 * TT, TL, and CEEN information to the TL=0 handler via
1526 * systrap parameter so save it off here.
1527 */
1528 cmp %g3, %g0
1529 be 1f
1530 nop
1531 sllx %g4, 32, %g4
1532 or %g4, %g3, %g3
1533 1:
1534 /*
1535 * Flush the Icache. Since we turned off the Icache to capture the
1536 * Icache line it is now stale or corrupted and we must flush it
1537 * before re-enabling it.
1538 */
1539 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1540 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6
1541 ba,pt %icc, 2f
1542 ld [%g5 + CHPR_ICACHE_SIZE], %g5
1543 async_err_1:
1544 ASM_LD(%g5, icache_size)
1545 ASM_LD(%g6, icache_linesize)
1546 2:
1547 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1548
1549 /*
1550 * XXX - Don't we need to flush the Dcache before turning it back
1551 * on to avoid stale or corrupt data? Was this broken?
1552 */
1553 /*
1554 * Flush the Dcache before turning it back on since it may now
1555 * contain stale or corrupt data.
1556 */
1557 ASM_LD(%g5, dcache_size)
1558 ASM_LD(%g6, dcache_linesize)
1559 CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1560
1561 /*
1562 * check to see whether we parked our sibling core at the start
1563 * of this handler. If so, we need to unpark it here.
1564 * We use DCUCR reserved bits (stored in %g1) to keep track of
1565 * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1566 */
1567 UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1568
1569 /*
1570 * Restore Icache and Dcache to previous state.
1571 */
1572 stxa %g1, [%g0]ASI_DCU
1573 flush %g0 /* flush required after changing the IC bit */
1574
1575 /*
1576 * Make sure our CPU logout operation was successful.
1577 */
1578 cmp %g3, %g0
1579 be 4f
1580 nop
1581
1582 /*
1583 * If the logout structure had been busy, how many times have
1584 * we tried to use it and failed (nesting count)? If we have
1585 * already recursed a substantial number of times, then we can
1586 * assume things are not going to get better by themselves and
1587 * so it would be best to panic.
1588 */
1589 cmp %g3, CLO_NESTING_MAX
1590 blt 3f
1591 nop
1592
1593 call ptl1_panic
1594 mov PTL1_BAD_ECC, %g1
1595
1596 3:
1597 /*
1598 * Otherwise, if the logout structure was busy but we have not
1599 * nested more times than our maximum value, then we simply
1600 * issue a retry. Our TL=0 trap handler code will check and
1601 * clear the AFSR after it is done logging what is currently
1602 * in the logout struct and handle this event at that time.
1603 */
1604 retry
1605 4:
1606 RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1607 async_err_resetskip:
1608 set cpu_deferred_error, %g1
1609 ba sys_trap
1610 mov PIL_15, %g4 ! run at pil 15
1611 SET_SIZE(async_err)
1612
1613 #if defined(CPU_IMP_L1_CACHE_PARITY)
1614
1615 /*
1616 * D$ parity error trap (trap 71) at TL=0.
1617 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1618 * the various architecture-specific files. This merely sets up the
1619 * arguments for cpu_parity_error and calls it via sys_trap.
1620 * NB: Must be 8 instructions or less to fit in trap table and code must
1621 * be relocatable.
1622 */
1623 ENTRY_NP(dcache_parity_instr)
1624 membar #Sync ! Cheetah+ requires membar #Sync
1625 set cpu_parity_error, %g1
1626 or %g0, CH_ERR_DPE, %g2
1627 rdpr %tpc, %g3
1628 sethi %hi(sys_trap), %g7
1629 jmp %g7 + %lo(sys_trap)
1630 mov PIL_15, %g4 ! run at pil 15
1631 SET_SIZE(dcache_parity_instr)
1632
1633
1634 /*
1635 * D$ parity error trap (trap 71) at TL>0.
1636 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1637 * the various architecture-specific files. This generates a "Software
1638 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1639 * continue the handling there.
1640 * NB: Must be 8 instructions or less to fit in trap table and code must
1641 * be relocatable.
1642 */
1643 ENTRY_NP(dcache_parity_tl1_instr)
1644 CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1645 SET_SIZE(dcache_parity_tl1_instr)
1646
1647
1648 /*
1649 * Software trap 1 at TL>0.
1650 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1651 * of the various architecture-specific files. This is used as a continuation
1652 * of the dcache parity handling where we've bought an extra TL level, so we
1653 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1654 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1655 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1656 * order two bits from %g1 and %g2 respectively).
1657 * NB: Must be 8 instructions or less to fit in trap table and code must
1658 * be relocatable.
1659 */
1660 ENTRY_NP(dcache_parity_tl1_cont_instr)
1661 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
1662 SET_SIZE(dcache_parity_tl1_cont_instr)
1663
1664 /*
1665 * D$ parity error at TL>0 handler
1666 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter
1667 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1668 */
1669
1670 ENTRY_NP(dcache_parity_tl1_err)
1671
1672 /*
1673 * This macro saves all the %g registers in the ch_err_tl1_data
1674 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1675 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
1676 * the ch_err_tl1_data structure and %g2 will have the original
1677 * flags in the ch_err_tl1_data structure. All %g registers
1678 * except for %g1 and %g2 will be available.
1679 */
1680 CH_ERR_TL1_ENTER(CH_ERR_DPE);
1681
1682 #ifdef TRAPTRACE
1683 /*
1684 * Get current trap trace entry physical pointer.
1685 */
1686 CPU_INDEX(%g6, %g5)
1687 sll %g6, TRAPTR_SIZE_SHIFT, %g6
1688 set trap_trace_ctl, %g5
1689 add %g6, %g5, %g6
1690 ld [%g6 + TRAPTR_LIMIT], %g5
1691 tst %g5
1692 be %icc, dpe_tl1_skip_tt
1693 nop
1694 ldx [%g6 + TRAPTR_PBASE], %g5
1695 ld [%g6 + TRAPTR_OFFSET], %g4
1696 add %g5, %g4, %g5
1697
1698 /*
1699 * Create trap trace entry.
1700 */
1701 rd %asi, %g7
1702 wr %g0, TRAPTR_ASI, %asi
1703 rd STICK, %g4
1704 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi
1705 rdpr %tl, %g4
1706 stha %g4, [%g5 + TRAP_ENT_TL]%asi
1707 rdpr %tt, %g4
1708 stha %g4, [%g5 + TRAP_ENT_TT]%asi
1709 rdpr %tpc, %g4
1710 stna %g4, [%g5 + TRAP_ENT_TPC]%asi
1711 rdpr %tstate, %g4
1712 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1713 stna %sp, [%g5 + TRAP_ENT_SP]%asi
1714 stna %g0, [%g5 + TRAP_ENT_TR]%asi
1715 stna %g0, [%g5 + TRAP_ENT_F1]%asi
1716 stna %g0, [%g5 + TRAP_ENT_F2]%asi
1717 stna %g0, [%g5 + TRAP_ENT_F3]%asi
1718 stna %g0, [%g5 + TRAP_ENT_F4]%asi
1719 wr %g0, %g7, %asi
1720
1721 /*
1722 * Advance trap trace pointer.
1723 */
1724 ld [%g6 + TRAPTR_OFFSET], %g5
1725 ld [%g6 + TRAPTR_LIMIT], %g4
1726 st %g5, [%g6 + TRAPTR_LAST_OFFSET]
1727 add %g5, TRAP_ENT_SIZE, %g5
1728 sub %g4, TRAP_ENT_SIZE, %g4
1729 cmp %g5, %g4
1730 movge %icc, 0, %g5
1731 st %g5, [%g6 + TRAPTR_OFFSET]
1732 dpe_tl1_skip_tt:
1733 #endif /* TRAPTRACE */
1734
1735 /*
1736 * I$ and D$ are automatically turned off by HW when the CPU hits
1737 * a dcache or icache parity error so we will just leave those two
1738 * off for now to avoid repeating this trap.
1739 * For Panther, however, since we trap on P$ data parity errors
1740 * and HW does not automatically disable P$, we need to disable it
1741 * here so that we don't encounter any recursive traps when we
1742 * issue the retry.
1743 */
1744 ldxa [%g0]ASI_DCU, %g3
1745 mov 1, %g4
1746 sllx %g4, DCU_PE_SHIFT, %g4
1747 andn %g3, %g4, %g3
1748 stxa %g3, [%g0]ASI_DCU
1749 membar #Sync
1750
1751 /*
1752 * We fall into this macro if we've successfully logged the error in
1753 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1754 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
1755 * Restores the %g registers and issues retry.
1756 */
1757 CH_ERR_TL1_EXIT;
1758 SET_SIZE(dcache_parity_tl1_err)
1759
1760 /*
1761 * I$ parity error trap (trap 72) at TL=0.
1762 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
1763 * the various architecture-specific files. This merely sets up the
1764 * arguments for cpu_parity_error and calls it via sys_trap.
1765 * NB: Must be 8 instructions or less to fit in trap table and code must
1766 * be relocatable.
1767 */
1768
1769 ENTRY_NP(icache_parity_instr)
1770 membar #Sync ! Cheetah+ requires membar #Sync
1771 set cpu_parity_error, %g1
1772 or %g0, CH_ERR_IPE, %g2
1773 rdpr %tpc, %g3
1774 sethi %hi(sys_trap), %g7
1775 jmp %g7 + %lo(sys_trap)
1776 mov PIL_15, %g4 ! run at pil 15
1777 SET_SIZE(icache_parity_instr)
1778
1779 /*
1780 * I$ parity error trap (trap 72) at TL>0.
1781 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
1782 * the various architecture-specific files. This generates a "Software
1783 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
1784 * continue the handling there.
1785 * NB: Must be 8 instructions or less to fit in trap table and code must
1786 * be relocatable.
1787 */
1788 ENTRY_NP(icache_parity_tl1_instr)
1789 CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
1790 SET_SIZE(icache_parity_tl1_instr)
1791
1792 /*
1793 * Software trap 2 at TL>0.
1794 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
1795 * of the various architecture-specific files. This is used as a continuation
1796 * of the icache parity handling where we've bought an extra TL level, so we
1797 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1798 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1799 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1800 * order two bits from %g1 and %g2 respectively).
1801 * NB: Must be 8 instructions or less to fit in trap table and code must
1802 * be relocatable.
1803 */
1804 ENTRY_NP(icache_parity_tl1_cont_instr)
1805 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
1806 SET_SIZE(icache_parity_tl1_cont_instr)
1807
1808
1809 /*
1810 * I$ parity error at TL>0 handler
1811 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter
1812 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1813 */
1814
1815 ENTRY_NP(icache_parity_tl1_err)
1816
1817 /*
1818 * This macro saves all the %g registers in the ch_err_tl1_data
1819 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1820 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
1821 * the ch_err_tl1_data structure and %g2 will have the original
1822 * flags in the ch_err_tl1_data structure. All %g registers
1823 * except for %g1 and %g2 will be available.
1824 */
1825 CH_ERR_TL1_ENTER(CH_ERR_IPE);
1826
1827 #ifdef TRAPTRACE
1828 /*
1829 * Get current trap trace entry physical pointer.
1830 */
1831 CPU_INDEX(%g6, %g5)
1832 sll %g6, TRAPTR_SIZE_SHIFT, %g6
1833 set trap_trace_ctl, %g5
1834 add %g6, %g5, %g6
1835 ld [%g6 + TRAPTR_LIMIT], %g5
1836 tst %g5
1837 be %icc, ipe_tl1_skip_tt
1838 nop
1839 ldx [%g6 + TRAPTR_PBASE], %g5
1840 ld [%g6 + TRAPTR_OFFSET], %g4
1841 add %g5, %g4, %g5
1842
1843 /*
1844 * Create trap trace entry.
1845 */
1846 rd %asi, %g7
1847 wr %g0, TRAPTR_ASI, %asi
1848 rd STICK, %g4
1849 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi
1850 rdpr %tl, %g4
1851 stha %g4, [%g5 + TRAP_ENT_TL]%asi
1852 rdpr %tt, %g4
1853 stha %g4, [%g5 + TRAP_ENT_TT]%asi
1854 rdpr %tpc, %g4
1855 stna %g4, [%g5 + TRAP_ENT_TPC]%asi
1856 rdpr %tstate, %g4
1857 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1858 stna %sp, [%g5 + TRAP_ENT_SP]%asi
1859 stna %g0, [%g5 + TRAP_ENT_TR]%asi
1860 stna %g0, [%g5 + TRAP_ENT_F1]%asi
1861 stna %g0, [%g5 + TRAP_ENT_F2]%asi
1862 stna %g0, [%g5 + TRAP_ENT_F3]%asi
1863 stna %g0, [%g5 + TRAP_ENT_F4]%asi
1864 wr %g0, %g7, %asi
1865
1866 /*
1867 * Advance trap trace pointer.
1868 */
1869 ld [%g6 + TRAPTR_OFFSET], %g5
1870 ld [%g6 + TRAPTR_LIMIT], %g4
1871 st %g5, [%g6 + TRAPTR_LAST_OFFSET]
1872 add %g5, TRAP_ENT_SIZE, %g5
1873 sub %g4, TRAP_ENT_SIZE, %g4
1874 cmp %g5, %g4
1875 movge %icc, 0, %g5
1876 st %g5, [%g6 + TRAPTR_OFFSET]
1877 ipe_tl1_skip_tt:
1878 #endif /* TRAPTRACE */
1879
1880 /*
1881 * We fall into this macro if we've successfully logged the error in
1882 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1883 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
1884 * Restores the %g registers and issues retry.
1885 */
1886 CH_ERR_TL1_EXIT;
1887
1888 SET_SIZE(icache_parity_tl1_err)
1889
1890 #endif /* CPU_IMP_L1_CACHE_PARITY */
1891
1892
1893 /*
1894 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1895 * tte, the virtual address, and the ctxnum of the specified tlb entry. They
1896 * should only be used in places where you have no choice but to look at the
1897 * tlb itself.
1898 *
1899 * Note: These two routines are required by the Estar "cpr" loadable module.
1900 */
1901
1902 ENTRY_NP(itlb_rd_entry)
1903 sllx %o0, 3, %o0
1904 ldxa [%o0]ASI_ITLB_ACCESS, %g1
1905 stx %g1, [%o1]
1906 ldxa [%o0]ASI_ITLB_TAGREAD, %g2
1907 set TAGREAD_CTX_MASK, %o4
1908 andn %g2, %o4, %o5
1909 retl
1910 stx %o5, [%o2]
1911 SET_SIZE(itlb_rd_entry)
1912
1913
1914 ENTRY_NP(dtlb_rd_entry)
1915 sllx %o0, 3, %o0
1916 ldxa [%o0]ASI_DTLB_ACCESS, %g1
1917 stx %g1, [%o1]
1918 ldxa [%o0]ASI_DTLB_TAGREAD, %g2
1919 set TAGREAD_CTX_MASK, %o4
1920 andn %g2, %o4, %o5
1921 retl
1922 stx %o5, [%o2]
1923 SET_SIZE(dtlb_rd_entry)
1924
1925
1926 #if !(defined(JALAPENO) || defined(SERRANO))
1927
1928 ENTRY(get_safari_config)
1929 ldxa [%g0]ASI_SAFARI_CONFIG, %o0
1930 retl
1931 nop
1932 SET_SIZE(get_safari_config)
1933
1934
1935 ENTRY(set_safari_config)
1936 stxa %o0, [%g0]ASI_SAFARI_CONFIG
1937 membar #Sync
1938 retl
1939 nop
1940 SET_SIZE(set_safari_config)
1941
1942 #endif /* !(JALAPENO || SERRANO) */
1943
1944
1945 /*
1946 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
1947 * registers. In an effort to make the change in the
1948 * tick/stick counter as consistent as possible, we disable
1949 * all interrupts while we're changing the registers. We also
1950 * ensure that the read and write instructions are in the same
1951 * line in the instruction cache.
1952 */
1953 ENTRY_NP(cpu_clearticknpt)
1954 rdpr %pstate, %g1 /* save processor state */
1955 andn %g1, PSTATE_IE, %g3 /* turn off */
1956 wrpr %g0, %g3, %pstate /* interrupts */
1957 rdpr %tick, %g2 /* get tick register */
1958 brgez,pn %g2, 1f /* if NPT bit off, we're done */
1959 mov 1, %g3 /* create mask */
1960 sllx %g3, 63, %g3 /* for NPT bit */
1961 ba,a,pt %xcc, 2f
1962 .align 8 /* Ensure rd/wr in same i$ line */
1963 2:
1964 rdpr %tick, %g2 /* get tick register */
1965 wrpr %g3, %g2, %tick /* write tick register, */
1966 /* clearing NPT bit */
1967 1:
1968 rd STICK, %g2 /* get stick register */
1969 brgez,pn %g2, 3f /* if NPT bit off, we're done */
1970 mov 1, %g3 /* create mask */
1971 sllx %g3, 63, %g3 /* for NPT bit */
1972 ba,a,pt %xcc, 4f
1973 .align 8 /* Ensure rd/wr in same i$ line */
1974 4:
1975 rd STICK, %g2 /* get stick register */
1976 wr %g3, %g2, STICK /* write stick register, */
1977 /* clearing NPT bit */
1978 3:
1979 jmp %g4 + 4
1980 wrpr %g0, %g1, %pstate /* restore processor state */
1981
1982 SET_SIZE(cpu_clearticknpt)
1983
1984
1985 #if defined(CPU_IMP_L1_CACHE_PARITY)
1986
1987 /*
1988 * correct_dcache_parity(size_t size, size_t linesize)
1989 *
1990 * Correct D$ data parity by zeroing the data and initializing microtag
1991 * for all indexes and all ways of the D$.
1992 *
1993 */
1994 ENTRY(correct_dcache_parity)
1995 /*
1996 * Register Usage:
1997 *
1998 * %o0 = input D$ size
1999 * %o1 = input D$ line size
2000 * %o2 = scratch
2001 * %o3 = scratch
2002 * %o4 = scratch
2003 */
2004
2005 sub %o0, %o1, %o0 ! init cache line address
2006
2007 /*
2008 * For Panther CPUs, we also need to clear the data parity bits
2009 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2010 */
2011 GET_CPU_IMPL(%o3)
2012 cmp %o3, PANTHER_IMPL
2013 bne 1f
2014 clr %o3 ! zero for non-Panther
2015 mov 1, %o3
2016 sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2017
2018 1:
2019 /*
2020 * Set utag = way since it must be unique within an index.
2021 */
2022 srl %o0, 14, %o2 ! get cache way (DC_way)
2023 membar #Sync ! required before ASI_DC_UTAG
2024 stxa %o2, [%o0]ASI_DC_UTAG ! set D$ utag = cache way
2025 membar #Sync ! required after ASI_DC_UTAG
2026
2027 /*
2028 * Zero line of D$ data (and data parity bits for Panther)
2029 */
2030 sub %o1, 8, %o2
2031 or %o0, %o3, %o4 ! same address + DC_data_parity
2032 2:
2033 membar #Sync ! required before ASI_DC_DATA
2034 stxa %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data
2035 membar #Sync ! required after ASI_DC_DATA
2036 /*
2037 * We also clear the parity bits if this is a panther. For non-Panther
2038 * CPUs, we simply end up clearing the $data register twice.
2039 */
2040 stxa %g0, [%o4 + %o2]ASI_DC_DATA
2041 membar #Sync
2042
2043 subcc %o2, 8, %o2
2044 bge 2b
2045 nop
2046
2047 subcc %o0, %o1, %o0
2048 bge 1b
2049 nop
2050
2051 retl
2052 nop
2053 SET_SIZE(correct_dcache_parity)
2054
2055 #endif /* CPU_IMP_L1_CACHE_PARITY */
2056
2057
2058 ENTRY_NP(stick_timestamp)
2059 rd STICK, %g1 ! read stick reg
2060 sllx %g1, 1, %g1
2061 srlx %g1, 1, %g1 ! clear npt bit
2062
2063 retl
2064 stx %g1, [%o0] ! store the timestamp
2065 SET_SIZE(stick_timestamp)
2066
2067
2068 ENTRY_NP(stick_adj)
2069 rdpr %pstate, %g1 ! save processor state
2070 andn %g1, PSTATE_IE, %g3
2071 ba 1f ! cache align stick adj
2072 wrpr %g0, %g3, %pstate ! turn off interrupts
2073
2074 .align 16
2075 1: nop
2076
2077 rd STICK, %g4 ! read stick reg
2078 add %g4, %o0, %o1 ! adjust stick with skew
2079 wr %o1, %g0, STICK ! write stick reg
2080
2081 retl
2082 wrpr %g1, %pstate ! restore processor state
2083 SET_SIZE(stick_adj)
2084
2085 ENTRY_NP(kdi_get_stick)
2086 rd STICK, %g1
2087 stx %g1, [%o0]
2088 retl
2089 mov %g0, %o0
2090 SET_SIZE(kdi_get_stick)
2091
2092 /*
2093 * Invalidate the specified line from the D$.
2094 *
2095 * Register usage:
2096 * %o0 - index for the invalidation, specifies DC_way and DC_addr
2097 *
2098 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2099 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2100 *
2101 * The format of the stored 64-bit value is:
2102 *
2103 * +----------+--------+----------+
2104 * | Reserved | DC_tag | DC_valid |
2105 * +----------+--------+----------+
2106 * 63 31 30 1 0
2107 *
2108 * DC_tag is the 30-bit physical tag of the associated line.
2109 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2110 *
2111 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2112 *
2113 * +----------+--------+----------+----------+
2114 * | Reserved | DC_way | DC_addr | Reserved |
2115 * +----------+--------+----------+----------+
2116 * 63 16 15 14 13 5 4 0
2117 *
2118 * DC_way is a 2-bit index that selects one of the 4 ways.
2119 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2120 *
2121 * Setting the DC_valid bit to zero for the specified DC_way and
2122 * DC_addr index into the D$ results in an invalidation of a D$ line.
2123 */
2124 ENTRY(dcache_inval_line)
2125 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr
2126 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits
2127 membar #Sync
2128 retl
2129 nop
2130 SET_SIZE(dcache_inval_line)
2131
2132 /*
2133 * Invalidate the entire I$
2134 *
2135 * Register usage:
2136 * %o0 - specifies IC_way, IC_addr, IC_tag
2137 * %o1 - scratch
2138 * %o2 - used to save and restore DCU value
2139 * %o3 - scratch
2140 * %o5 - used to save and restore PSTATE
2141 *
2142 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2143 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2144 * block out snoops and invalidates to the I$, causing I$ consistency
2145 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2146 *
2147 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2148 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2149 * info below describes store (write) use of ASI_IC_TAG. Note that read
2150 * use of ASI_IC_TAG behaves differently.
2151 *
2152 * The format of the stored 64-bit value is:
2153 *
2154 * +----------+--------+---------------+-----------+
2155 * | Reserved | Valid | IC_vpred<7:0> | Undefined |
2156 * +----------+--------+---------------+-----------+
2157 * 63 55 54 53 46 45 0
2158 *
2159 * Valid is the 1-bit valid field for both the physical and snoop tags.
2160 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2161 * the 32-byte boundary aligned address specified by IC_addr.
2162 *
2163 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2164 *
2165 * +----------+--------+---------+--------+---------+
2166 * | Reserved | IC_way | IC_addr | IC_tag |Reserved |
2167 * +----------+--------+---------+--------+---------+
2168 * 63 16 15 14 13 5 4 3 2 0
2169 *
2170 * IC_way is a 2-bit index that selects one of the 4 ways.
2171 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2172 * IC_addr[5] is a "don't care" for a store.
2173 * IC_tag set to 2 specifies that the stored value is to be interpreted
2174 * as containing Valid and IC_vpred as described above.
2175 *
2176 * Setting the Valid bit to zero for the specified IC_way and
2177 * IC_addr index into the I$ results in an invalidation of an I$ line.
2178 */
2179 ENTRY(icache_inval_all)
2180 rdpr %pstate, %o5
2181 andn %o5, PSTATE_IE, %o3
2182 wrpr %g0, %o3, %pstate ! clear IE bit
2183
2184 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2185 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
2186 ba,pt %icc, 2f
2187 ld [%o0 + CHPR_ICACHE_SIZE], %o0
2188 icache_inval_all_1:
2189 ASM_LD(%o0, icache_size)
2190 ASM_LD(%o1, icache_linesize)
2191 2:
2192 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2193
2194 retl
2195 wrpr %g0, %o5, %pstate ! restore earlier pstate
2196 SET_SIZE(icache_inval_all)
2197
2198
2199 /*
2200 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2201 * crosstrap. It atomically increments the outstanding request counter and,
2202 * if there was not already an outstanding request, branches to setsoftint_tl1
2203 * to enqueue an intr_vec for the given inum.
2204 */
2205
2206 ! Register usage:
2207 !
2208 ! Arguments:
2209 ! %g1 - inum
2210 ! %g2 - index into chsm_outstanding array
2211 !
2212 ! Internal:
2213 ! %g2, %g3, %g5 - scratch
2214 ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2215 ! %g6 - setsoftint_tl1 address
2216
2217 ENTRY_NP(cache_scrubreq_tl1)
2218 mulx %g2, CHSM_OUTSTANDING_INCR, %g2
2219 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2220 add %g2, %g3, %g2
2221 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2222 ld [%g4], %g2 ! cpu's chsm_outstanding[index]
2223 !
2224 ! no need to use atomic instructions for the following
2225 ! increment - we're at tl1
2226 !
2227 add %g2, 0x1, %g3
2228 brnz,pn %g2, 1f ! no need to enqueue more intr_vec
2229 st %g3, [%g4] ! delay - store incremented counter
2230 ASM_JMP(%g6, setsoftint_tl1)
2231 ! not reached
2232 1:
2233 retry
2234 SET_SIZE(cache_scrubreq_tl1)
2235
2236
2237 /*
2238 * Get the error state for the processor.
2239 * Note that this must not be used at TL>0
2240 */
2241 ENTRY(get_cpu_error_state)
2242 #if defined(CHEETAH_PLUS)
2243 set ASI_SHADOW_REG_VA, %o2
2244 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg
2245 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2246 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg
2247 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2248 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers
2249 cmp %o3, PANTHER_IMPL
2250 bne,a 1f
2251 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN
2252 set ASI_AFSR_EXT_VA, %o2
2253 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg
2254 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2255 set ASI_SHADOW_AFSR_EXT_VA, %o2
2256 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg
2257 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2258 b 2f
2259 nop
2260 1:
2261 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
2262 2:
2263 #else /* CHEETAH_PLUS */
2264 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2265 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2266 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2267 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2268 #endif /* CHEETAH_PLUS */
2269 #if defined(SERRANO)
2270 /*
2271 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2272 * We save this in the afar2 of the register save area.
2273 */
2274 set ASI_MCU_AFAR2_VA, %o2
2275 ldxa [%o2]ASI_MCU_CTRL, %o1
2276 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2277 #endif /* SERRANO */
2278 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg
2279 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR]
2280 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg
2281 retl
2282 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR]
2283 SET_SIZE(get_cpu_error_state)
2284
2285 /*
2286 * Check a page of memory for errors.
2287 *
2288 * Load each 64 byte block from physical memory.
2289 * Check AFSR after each load to see if an error
2290 * was caused. If so, log/scrub that error.
2291 *
2292 * Used to determine if a page contains
2293 * CEs when CEEN is disabled.
2294 */
2295 ENTRY(cpu_check_block)
2296 !
2297 ! get a new window with room for the error regs
2298 !
2299 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2300 srl %i1, 6, %l4 ! clear top bits of psz
2301 ! and divide by 64
2302 rd %fprs, %l2 ! store FP
2303 wr %g0, FPRS_FEF, %fprs ! enable FP
2304 1:
2305 ldda [%i0]ASI_BLK_P, %d0 ! load a block
2306 membar #Sync
2307 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg
2308 brz,a,pt %l3, 2f ! check for error
2309 nop
2310
2311 !
2312 ! if error, read the error regs and log it
2313 !
2314 call get_cpu_error_state
2315 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2316
2317 !
2318 ! cpu_ce_detected(ch_cpu_errors_t *, flag)
2319 !
2320 call cpu_ce_detected ! log the error
2321 mov CE_CEEN_TIMEOUT, %o1
2322 2:
2323 dec %l4 ! next 64-byte block
2324 brnz,a,pt %l4, 1b
2325 add %i0, 64, %i0 ! increment block addr
2326
2327 wr %l2, %g0, %fprs ! restore FP
2328 ret
2329 restore
2330
2331 SET_SIZE(cpu_check_block)
2332
2333 /*
2334 * Perform a cpu logout called from C. This is used where we did not trap
2335 * for the error but still want to gather "what we can". Caller must make
2336 * sure cpu private area exists and that the indicated logout area is free
2337 * for use, and that we are unable to migrate cpus.
2338 */
2339 ENTRY(cpu_delayed_logout)
2340 rdpr %pstate, %o2
2341 andn %o2, PSTATE_IE, %o2
2342 wrpr %g0, %o2, %pstate ! disable interrupts
2343 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value
2344 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2345 rd %asi, %g1
2346 wr %g0, ASI_P, %asi
2347 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2348 wr %g1, %asi
2349 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again
2350 rdpr %pstate, %o2
2351 or %o2, PSTATE_IE, %o2
2352 wrpr %g0, %o2, %pstate
2353 retl
2354 nop
2355 SET_SIZE(cpu_delayed_logout)
2356
2357 ENTRY(dtrace_blksuword32)
2358 save %sp, -SA(MINFRAME + 4), %sp
2359
2360 rdpr %pstate, %l1
2361 andn %l1, PSTATE_IE, %l2 ! disable interrupts to
2362 wrpr %g0, %l2, %pstate ! protect our FPU diddling
2363
2364 rd %fprs, %l0
2365 andcc %l0, FPRS_FEF, %g0
2366 bz,a,pt %xcc, 1f ! if the fpu is disabled
2367 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu
2368
2369 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack
2370 1:
2371 set 0f, %l5
2372 /*
2373 * We're about to write a block full or either total garbage
2374 * (not kernel data, don't worry) or user floating-point data
2375 * (so it only _looks_ like garbage).
2376 */
2377 ld [%i1], %f0 ! modify the block
2378 membar #Sync
2379 stn %l5, [THREAD_REG + T_LOFAULT] ! set up the lofault handler
2380 stda %d0, [%i0]ASI_BLK_COMMIT_S ! store the modified block
2381 membar #Sync
2382 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler
2383
2384 bz,a,pt %xcc, 1f
2385 wr %g0, %l0, %fprs ! restore %fprs
2386
2387 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
2388 1:
2389
2390 wrpr %g0, %l1, %pstate ! restore interrupts
2391
2392 ret
2393 restore %g0, %g0, %o0
2394
2395 0:
2396 membar #Sync
2397 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler
2398
2399 bz,a,pt %xcc, 1f
2400 wr %g0, %l0, %fprs ! restore %fprs
2401
2402 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
2403 1:
2404
2405 wrpr %g0, %l1, %pstate ! restore interrupts
2406
2407 /*
2408 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2409 * which deals with watchpoints. Otherwise, just return -1.
2410 */
2411 brnz,pt %i2, 1f
2412 nop
2413 ret
2414 restore %g0, -1, %o0
2415 1:
2416 call dtrace_blksuword32_err
2417 restore
2418
2419 SET_SIZE(dtrace_blksuword32)
2420
2421 #ifdef CHEETAHPLUS_ERRATUM_25
2422
2423 ENTRY(claimlines)
2424 1:
2425 subcc %o1, %o2, %o1
2426 add %o0, %o1, %o3
2427 bgeu,a,pt %xcc, 1b
2428 casxa [%o3]ASI_MEM, %g0, %g0
2429 membar #Sync
2430 retl
2431 nop
2432 SET_SIZE(claimlines)
2433
2434 ENTRY(cpu_feature_init)
2435 save %sp, -SA(MINFRAME), %sp
2436 sethi %hi(cheetah_bpe_off), %o0
2437 ld [%o0 + %lo(cheetah_bpe_off)], %o0
2438 brz %o0, 1f
2439 nop
2440 rd ASR_DISPATCH_CONTROL, %o0
2441 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0
2442 wr %o0, 0, ASR_DISPATCH_CONTROL
2443 1:
2444 !
2445 ! get the device_id and store the device_id
2446 ! in the appropriate cpunodes structure
2447 ! given the cpus index
2448 !
2449 CPU_INDEX(%o0, %o1)
2450 mulx %o0, CPU_NODE_SIZE, %o0
2451 set cpunodes + DEVICE_ID, %o1
2452 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
2453 stx %o2, [%o0 + %o1]
2454 #ifdef CHEETAHPLUS_ERRATUM_34
2455 !
2456 ! apply Cheetah+ erratum 34 workaround
2457 !
2458 call itlb_erratum34_fixup
2459 nop
2460 call dtlb_erratum34_fixup
2461 nop
2462 #endif /* CHEETAHPLUS_ERRATUM_34 */
2463 ret
2464 restore
2465 SET_SIZE(cpu_feature_init)
2466
2467 /*
2468 * Copy a tsb entry atomically, from src to dest.
2469 * src must be 128 bit aligned.
2470 */
2471 ENTRY(copy_tsb_entry)
2472 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data
2473 stx %o2, [%o1]
2474 stx %o3, [%o1 + 8 ]
2475 retl
2476 nop
2477 SET_SIZE(copy_tsb_entry)
2478
2479 #endif /* CHEETAHPLUS_ERRATUM_25 */
2480
2481 #ifdef CHEETAHPLUS_ERRATUM_34
2482
2483 !
2484 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
2485 ! index 0 TTE will erroneously be displaced when a new TTE is
2486 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34,
2487 ! locked index 0 TTEs must be relocated.
2488 !
2489 ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
2490 !
2491 ENTRY_NP(itlb_erratum34_fixup)
2492 rdpr %pstate, %o3
2493 #ifdef DEBUG
2494 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
2495 #endif /* DEBUG */
2496 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
2497 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data
2498 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag
2499
2500 cmp %o1, %g0 ! Is this entry valid?
2501 bge %xcc, 1f
2502 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
2503 bnz %icc, 2f
2504 nop
2505 1:
2506 retl ! Nope, outta here...
2507 wrpr %g0, %o3, %pstate ! Enable interrupts
2508 2:
2509 sethi %hi(FLUSH_ADDR), %o4
2510 stxa %g0, [%o2]ASI_ITLB_DEMAP ! Flush this mapping
2511 flush %o4 ! Flush required for I-MMU
2512 !
2513 ! Start search from index 1 up. This is because the kernel force
2514 ! loads its text page at index 15 in sfmmu_kernel_remap() and we
2515 ! don't want our relocated entry evicted later.
2516 !
2517 ! NOTE: We assume that we'll be successful in finding an unlocked
2518 ! or invalid entry. If that isn't the case there are bound to
2519 ! bigger problems.
2520 !
2521 set (1 << 3), %g3
2522 3:
2523 ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16
2524 !
2525 ! If this entry isn't valid, we'll choose to displace it (regardless
2526 ! of the lock bit).
2527 !
2528 cmp %o4, %g0 ! TTE is > 0 iff not valid
2529 bge %xcc, 4f ! If invalid, go displace
2530 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
2531 bnz,a %icc, 3b ! If locked, look at next
2532 add %g3, (1 << 3), %g3 ! entry
2533 4:
2534 !
2535 ! We found an unlocked or invalid entry; we'll explicitly load
2536 ! the former index 0 entry here.
2537 !
2538 sethi %hi(FLUSH_ADDR), %o4
2539 set MMU_TAG_ACCESS, %g4
2540 stxa %o2, [%g4]ASI_IMMU
2541 stxa %o1, [%g3]ASI_ITLB_ACCESS
2542 flush %o4 ! Flush required for I-MMU
2543 retl
2544 wrpr %g0, %o3, %pstate ! Enable interrupts
2545 SET_SIZE(itlb_erratum34_fixup)
2546
2547 !
2548 ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
2549 ! index 0 TTE will erroneously be displaced when a new TTE is
2550 ! loaded. In order to avoid cheetah+ erratum 34, locked index 0
2551 ! TTEs must be relocated.
2552 !
2553 ENTRY_NP(dtlb_erratum34_fixup)
2554 rdpr %pstate, %o3
2555 #ifdef DEBUG
2556 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
2557 #endif /* DEBUG */
2558 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
2559 ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data
2560 ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag
2561
2562 cmp %o1, %g0 ! Is this entry valid?
2563 bge %xcc, 1f
2564 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
2565 bnz %icc, 2f
2566 nop
2567 1:
2568 retl ! Nope, outta here...
2569 wrpr %g0, %o3, %pstate ! Enable interrupts
2570 2:
2571 stxa %g0, [%o2]ASI_DTLB_DEMAP ! Flush this mapping
2572 membar #Sync
2573 !
2574 ! Start search from index 1 up.
2575 !
2576 ! NOTE: We assume that we'll be successful in finding an unlocked
2577 ! or invalid entry. If that isn't the case there are bound to
2578 ! bigger problems.
2579 !
2580 set (1 << 3), %g3
2581 3:
2582 ldxa [%g3]ASI_DTLB_ACCESS, %o4 ! Load TTE from t16
2583 !
2584 ! If this entry isn't valid, we'll choose to displace it (regardless
2585 ! of the lock bit).
2586 !
2587 cmp %o4, %g0 ! TTE is > 0 iff not valid
2588 bge %xcc, 4f ! If invalid, go displace
2589 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
2590 bnz,a %icc, 3b ! If locked, look at next
2591 add %g3, (1 << 3), %g3 ! entry
2592 4:
2593 !
2594 ! We found an unlocked or invalid entry; we'll explicitly load
2595 ! the former index 0 entry here.
2596 !
2597 set MMU_TAG_ACCESS, %g4
2598 stxa %o2, [%g4]ASI_DMMU
2599 stxa %o1, [%g3]ASI_DTLB_ACCESS
2600 membar #Sync
2601 retl
2602 wrpr %g0, %o3, %pstate ! Enable interrupts
2603 SET_SIZE(dtlb_erratum34_fixup)
2604
2605 #endif /* CHEETAHPLUS_ERRATUM_34 */
2606