8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28 #if !defined(lint)
29 #include "assym.h"
30 #endif /* !lint */
31
32 #include <sys/asm_linkage.h>
33 #include <sys/mmu.h>
34 #include <vm/hat_sfmmu.h>
35 #include <sys/machparam.h>
36 #include <sys/machcpuvar.h>
37 #include <sys/machthread.h>
38 #include <sys/machtrap.h>
39 #include <sys/privregs.h>
40 #include <sys/trap.h>
41 #include <sys/cheetahregs.h>
42 #include <sys/us3_module.h>
43 #include <sys/xc_impl.h>
44 #include <sys/intreg.h>
45 #include <sys/async.h>
46 #include <sys/clock.h>
47 #include <sys/cheetahasm.h>
48 #include <sys/cmpregs.h>
49
50 #ifdef TRAPTRACE
51 #include <sys/traptrace.h>
52 #endif /* TRAPTRACE */
53
54 #if !defined(lint)
55
56 /* BEGIN CSTYLED */
57
58 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \
59 ldxa [%g0]ASI_DCU, tmp1 ;\
60 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\
61 bz,pn %icc, 1f ;\
62 ASM_LD(tmp1, dcache_linesize) ;\
63 ASM_LD(tmp2, dflush_type) ;\
64 cmp tmp2, FLUSHPAGE_TYPE ;\
65 be,pt %icc, 2f ;\
66 nop ;\
67 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\
68 ASM_LD(tmp3, dcache_size) ;\
69 cmp tmp2, FLUSHMATCH_TYPE ;\
70 be,pt %icc, 3f ;\
71 nop ;\
72 /* \
73 * flushtype = FLUSHALL_TYPE, flush the whole thing \
74 * tmp3 = cache size \
75 * tmp1 = cache line size \
152 sllx arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */ \
153 mov way, tmp3; \
154 sllx tmp3, 14, tmp3; /* One way 16K */ \
155 or tmp2, tmp3, tmp3; \
156 set MMU_PAGESIZE, tmp2; \
157 /* \
158 * tmp2 = page size \
159 * tmp3 = cached page in dcache \
160 */ \
161 sub tmp2, tmp1, tmp2; \
162 2: \
163 stxa %g0, [tmp3 + tmp2]ASI_DC_TAG; \
164 membar #Sync; \
165 cmp %g0, tmp2; \
166 bne,pt %icc, 2b; \
167 sub tmp2, tmp1, tmp2; \
168 1:
169
170 /* END CSTYLED */
171
172 #endif /* !lint */
173
174 /*
175 * Cheetah MMU and Cache operations.
176 */
177
178 #if defined(lint)
179
180 /* ARGSUSED */
181 void
182 vtag_flushpage(caddr_t vaddr, uint64_t sfmmup)
183 {}
184
185 #else /* lint */
186
187 ENTRY_NP(vtag_flushpage)
188 /*
189 * flush page from the tlb
190 *
191 * %o0 = vaddr
192 * %o1 = sfmmup
193 */
194 rdpr %pstate, %o5
195 #ifdef DEBUG
196 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
197 #endif /* DEBUG */
198 /*
199 * disable ints
200 */
201 andn %o5, PSTATE_IE, %o4
202 wrpr %o4, 0, %pstate
203
204 /*
205 * Then, blow out the tlb
206 * Interrupts are disabled to prevent the primary ctx register
235
236 wrpr %g0, 1, %tl
237 set MMU_PCONTEXT, %o4
238 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
239 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum
240 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz
241 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz
242 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum
243 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum
244
245 stxa %g0, [%o0]ASI_DTLB_DEMAP
246 stxa %g0, [%o0]ASI_ITLB_DEMAP
247 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */
248 flush %o3
249 wrpr %g0, 0, %tl
250
251 retl
252 wrpr %g0, %o5, %pstate /* enable interrupts */
253 SET_SIZE(vtag_flushpage)
254
255 #endif /* lint */
256
257 #if defined(lint)
258
259 void
260 vtag_flushall(void)
261 {}
262
263 #else /* lint */
264
265 ENTRY_NP2(vtag_flushall, demap_all)
266 /*
267 * flush the tlb
268 */
269 sethi %hi(FLUSH_ADDR), %o3
270 set DEMAP_ALL_TYPE, %g1
271 stxa %g0, [%g1]ASI_DTLB_DEMAP
272 stxa %g0, [%g1]ASI_ITLB_DEMAP
273 flush %o3
274 retl
275 nop
276 SET_SIZE(demap_all)
277 SET_SIZE(vtag_flushall)
278
279 #endif /* lint */
280
281
282 #if defined(lint)
283
284 /* ARGSUSED */
285 void
286 vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup)
287 {}
288
289 #else /* lint */
290
291 ENTRY_NP(vtag_flushpage_tl1)
292 /*
293 * x-trap to flush page from tlb and tsb
294 *
295 * %g1 = vaddr, zero-extended on 32-bit kernel
296 * %g2 = sfmmup
297 *
298 * assumes TSBE_TAG = 0
299 */
300 srln %g1, MMU_PAGESHIFT, %g1
301
302 sethi %hi(ksfmmup), %g3
303 ldx [%g3 + %lo(ksfmmup)], %g3
304 cmp %g3, %g2
305 bne,pt %xcc, 1f ! if not kernel as, go to 1
306 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
307
308 /* We need to demap in the kernel context */
309 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
310 stxa %g0, [%g1]ASI_DTLB_DEMAP
315 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
316
317 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU
318
319 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
320 sll %g4, CTXREG_EXT_SHIFT, %g4
321 or %g6, %g4, %g6 ! %g6 = pgsz | cnum
322
323 set MMU_PCONTEXT, %g4
324 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */
325 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
326 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
327 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */
328 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */
329 stxa %g0, [%g1]ASI_DTLB_DEMAP
330 stxa %g0, [%g1]ASI_ITLB_DEMAP
331 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */
332 retry
333 SET_SIZE(vtag_flushpage_tl1)
334
335 #endif /* lint */
336
337
338 #if defined(lint)
339
340 /* ARGSUSED */
341 void
342 vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt)
343 {}
344
345 #else /* lint */
346
347 ENTRY_NP(vtag_flush_pgcnt_tl1)
348 /*
349 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
350 *
351 * %g1 = vaddr, zero-extended on 32-bit kernel
352 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
353 *
354 * NOTE: this handler relies on the fact that no
355 * interrupts or traps can occur during the loop
356 * issuing the TLB_DEMAP operations. It is assumed
357 * that interrupts are disabled and this code is
358 * fetching from the kernel locked text address.
359 *
360 * assumes TSBE_TAG = 0
361 */
362 set SFMMU_PGCNT_MASK, %g4
363 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */
364 add %g3, 1, %g3 /* g3 = pgcnt */
365
366 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */
405 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
406 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
407 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */
408 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */
409
410 set MMU_PAGESIZE, %g2 /* g2 = pgsize */
411 sethi %hi(FLUSH_ADDR), %g5
412 3:
413 stxa %g0, [%g1]ASI_DTLB_DEMAP
414 stxa %g0, [%g1]ASI_ITLB_DEMAP
415 flush %g5 ! flush required by immu
416
417 deccc %g3 /* decr pgcnt */
418 bnz,pt %icc,3b
419 add %g1, %g2, %g1 /* next page */
420
421 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */
422 retry
423 SET_SIZE(vtag_flush_pgcnt_tl1)
424
425 #endif /* lint */
426
427 #if defined(lint)
428
429 /*ARGSUSED*/
430 void
431 vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2)
432 {}
433
434 #else /* lint */
435
436 ENTRY_NP(vtag_flushall_tl1)
437 /*
438 * x-trap to flush tlb
439 */
440 set DEMAP_ALL_TYPE, %g4
441 stxa %g0, [%g4]ASI_DTLB_DEMAP
442 stxa %g0, [%g4]ASI_ITLB_DEMAP
443 retry
444 SET_SIZE(vtag_flushall_tl1)
445
446 #endif /* lint */
447
448
449 #if defined(lint)
450
451 /* ARGSUSED */
452 void
453 vac_flushpage(pfn_t pfnum, int vcolor)
454 {}
455
456 #else /* lint */
457
458 /*
459 * vac_flushpage(pfnum, color)
460 * Flush 1 8k page of the D-$ with physical page = pfnum
461 * Algorithm:
462 * The cheetah dcache is a 64k psuedo 4 way accaociative cache.
463 * It is virtual indexed, physically tagged cache.
464 */
465 .seg ".data"
466 .align 8
467 .global dflush_type
468 dflush_type:
469 .word FLUSHPAGE_TYPE
470
471 ENTRY(vac_flushpage)
472 /*
473 * flush page from the d$
474 *
475 * %o0 = pfnum, %o1 = color
476 */
477 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
478 retl
479 nop
480 SET_SIZE(vac_flushpage)
481
482 #endif /* lint */
483
484
485 #if defined(lint)
486
487 /* ARGSUSED */
488 void
489 vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor)
490 {}
491
492 #else /* lint */
493
494 ENTRY_NP(vac_flushpage_tl1)
495 /*
496 * x-trap to flush page from the d$
497 *
498 * %g1 = pfnum, %g2 = color
499 */
500 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
501 retry
502 SET_SIZE(vac_flushpage_tl1)
503
504 #endif /* lint */
505
506
507 #if defined(lint)
508
509 /* ARGSUSED */
510 void
511 vac_flushcolor(int vcolor, pfn_t pfnum)
512 {}
513
514 #else /* lint */
515
516 ENTRY(vac_flushcolor)
517 /*
518 * %o0 = vcolor
519 */
520 DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
521 DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
522 DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
523 DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
524 retl
525 nop
526 SET_SIZE(vac_flushcolor)
527
528 #endif /* lint */
529
530
531 #if defined(lint)
532
533 /* ARGSUSED */
534 void
535 vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum)
536 {}
537
538 #else /* lint */
539
540 ENTRY(vac_flushcolor_tl1)
541 /*
542 * %g1 = vcolor
543 */
544 DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
545 DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
546 DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
547 DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
548 retry
549 SET_SIZE(vac_flushcolor_tl1)
550
551 #endif /* lint */
552
553 #if defined(lint)
554
555 int
556 idsr_busy(void)
557 {
558 return (0);
559 }
560
561 #else /* lint */
562
563 /*
564 * Determine whether or not the IDSR is busy.
565 * Entry: no arguments
566 * Returns: 1 if busy, 0 otherwise
567 */
568 ENTRY(idsr_busy)
569 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
570 clr %o0
571 btst IDSR_BUSY, %g1
572 bz,a,pt %xcc, 1f
573 mov 1, %o0
574 1:
575 retl
576 nop
577 SET_SIZE(idsr_busy)
578
579 #endif /* lint */
580
581 #if defined(lint)
582
583 /* ARGSUSED */
584 void
585 init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
586 {}
587
588 /* ARGSUSED */
589 void
590 init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2)
591 {}
592
593 #else /* lint */
594
595 .global _dispatch_status_busy
596 _dispatch_status_busy:
597 .asciz "ASI_INTR_DISPATCH_STATUS error: busy"
598 .align 4
599
600 /*
601 * Setup interrupt dispatch data registers
602 * Entry:
603 * %o0 - function or inumber to call
604 * %o1, %o2 - arguments (2 uint64_t's)
605 */
606 .seg "text"
607
608 ENTRY(init_mondo)
609 #ifdef DEBUG
610 !
611 ! IDSR should not be busy at the moment
612 !
613 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
614 btst IDSR_BUSY, %g1
628 mov IDDR_1, %g2
629 mov IDDR_2, %g3
630 stxa %o0, [%g1]ASI_INTR_DISPATCH
631
632 !
633 ! interrupt vector dispatch data reg 1
634 !
635 stxa %o1, [%g2]ASI_INTR_DISPATCH
636
637 !
638 ! interrupt vector dispatch data reg 2
639 !
640 stxa %o2, [%g3]ASI_INTR_DISPATCH
641
642 membar #Sync
643 retl
644 nop
645 SET_SIZE(init_mondo_nocheck)
646 SET_SIZE(init_mondo)
647
648 #endif /* lint */
649
650
651 #if !(defined(JALAPENO) || defined(SERRANO))
652
653 #if defined(lint)
654
655 /* ARGSUSED */
656 void
657 shipit(int upaid, int bn)
658 { return; }
659
660 #else /* lint */
661
662 /*
663 * Ship mondo to aid using busy/nack pair bn
664 */
665 ENTRY_NP(shipit)
666 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id
667 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair
668 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70
669 or %g1, %g2, %g1
670 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch
671 membar #Sync
672 retl
673 nop
674 SET_SIZE(shipit)
675
676 #endif /* lint */
677
678 #endif /* !(JALAPENO || SERRANO) */
679
680
681 #if defined(lint)
682
683 /* ARGSUSED */
684 void
685 flush_instr_mem(caddr_t vaddr, size_t len)
686 {}
687
688 #else /* lint */
689
690 /*
691 * flush_instr_mem:
692 * Flush 1 page of the I-$ starting at vaddr
693 * %o0 vaddr
694 * %o1 bytes to be flushed
695 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
696 * the stores from all processors so that a FLUSH instruction is only needed
697 * to ensure pipeline is consistent. This means a single flush is sufficient at
698 * the end of a sequence of stores that updates the instruction stream to
699 * ensure correct operation.
700 */
701
702 ENTRY(flush_instr_mem)
703 flush %o0 ! address irrelevant
704 retl
705 nop
706 SET_SIZE(flush_instr_mem)
707
708 #endif /* lint */
709
710
711 #if defined(CPU_IMP_ECACHE_ASSOC)
712
713 #if defined(lint)
714
715 /* ARGSUSED */
716 uint64_t
717 get_ecache_ctrl(void)
718 { return (0); }
719
720 #else /* lint */
721
722 ENTRY(get_ecache_ctrl)
723 GET_CPU_IMPL(%o0)
724 cmp %o0, JAGUAR_IMPL
725 !
726 ! Putting an ASI access in the delay slot may
727 ! cause it to be accessed, even when annulled.
728 !
729 bne 1f
730 nop
731 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg
732 b 2f
733 nop
734 1:
735 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg
736 2:
737 retl
738 nop
739 SET_SIZE(get_ecache_ctrl)
740
741 #endif /* lint */
742
743 #endif /* CPU_IMP_ECACHE_ASSOC */
744
745
746 #if !(defined(JALAPENO) || defined(SERRANO))
747
748 /*
749 * flush_ecache:
750 * %o0 - 64 bit physical address
751 * %o1 - ecache size
752 * %o2 - ecache linesize
753 */
754 #if defined(lint)
755
756 /*ARGSUSED*/
757 void
758 flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize)
759 {}
760
761 #else /* !lint */
762
763 ENTRY(flush_ecache)
764
765 /*
766 * For certain CPU implementations, we have to flush the L2 cache
767 * before flushing the ecache.
768 */
769 PN_L2_FLUSHALL(%g3, %g4, %g5)
770
771 /*
772 * Flush the entire Ecache using displacement flush.
773 */
774 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
775
776 retl
777 nop
778 SET_SIZE(flush_ecache)
779
780 #endif /* lint */
781
782 #endif /* !(JALAPENO || SERRANO) */
783
784
785 #if defined(lint)
786
787 void
788 flush_dcache(void)
789 {}
790
791 #else /* lint */
792
793 ENTRY(flush_dcache)
794 ASM_LD(%o0, dcache_size)
795 ASM_LD(%o1, dcache_linesize)
796 CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
797 retl
798 nop
799 SET_SIZE(flush_dcache)
800
801 #endif /* lint */
802
803
804 #if defined(lint)
805
806 void
807 flush_icache(void)
808 {}
809
810 #else /* lint */
811
812 ENTRY(flush_icache)
813 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
814 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
815 ba,pt %icc, 2f
816 ld [%o0 + CHPR_ICACHE_SIZE], %o0
817 flush_icache_1:
818 ASM_LD(%o0, icache_size)
819 ASM_LD(%o1, icache_linesize)
820 2:
821 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
822 retl
823 nop
824 SET_SIZE(flush_icache)
825
826 #endif /* lint */
827
828 #if defined(lint)
829
830 /*ARGSUSED*/
831 void
832 kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size,
833 int icache_lsize)
834 {
835 }
836
837 #else /* lint */
838
839 ENTRY(kdi_flush_idcache)
840 CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
841 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
842 membar #Sync
843 retl
844 nop
845 SET_SIZE(kdi_flush_idcache)
846
847 #endif /* lint */
848
849 #if defined(lint)
850
851 void
852 flush_pcache(void)
853 {}
854
855 #else /* lint */
856
857 ENTRY(flush_pcache)
858 PCACHE_FLUSHALL(%o0, %o1, %o2)
859 retl
860 nop
861 SET_SIZE(flush_pcache)
862
863 #endif /* lint */
864
865
866 #if defined(CPU_IMP_L1_CACHE_PARITY)
867
868 #if defined(lint)
869
870 /* ARGSUSED */
871 void
872 get_dcache_dtag(uint32_t dcache_idx, uint64_t *data)
873 {}
874
875 #else /* lint */
876
877 /*
878 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t
879 * structure (see cheetahregs.h):
880 * The Dcache *should* be turned off when this code is executed.
881 */
882 .align 128
883 ENTRY(get_dcache_dtag)
884 rdpr %pstate, %o5
885 andn %o5, PSTATE_IE | PSTATE_AM, %o3
886 wrpr %g0, %o3, %pstate
887 b 1f
888 stx %o0, [%o1 + CH_DC_IDX]
889
890 .align 128
891 1:
892 ldxa [%o0]ASI_DC_TAG, %o2
893 stx %o2, [%o1 + CH_DC_TAG]
894 membar #Sync
895 ldxa [%o0]ASI_DC_UTAG, %o2
896 membar #Sync
934 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
935
936 /* add the DC_data_parity bit into our working index */
937 mov 1, %o2
938 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
939 or %o0, %o2, %o0
940 3:
941 membar #Sync ! required before ASI_DC_DATA
942 ldxa [%o0 + %o3]ASI_DC_DATA, %o2
943 membar #Sync ! required after ASI_DC_DATA
944 stb %o2, [%o1]
945 dec %o1
946 cmp %o3, CH_DC_DATA_REG_SIZE - 8
947 blt 3b
948 add %o3, 8, %o3
949 4:
950 retl
951 wrpr %g0, %o5, %pstate
952 SET_SIZE(get_dcache_dtag)
953
954 #endif /* lint */
955
956
957 #if defined(lint)
958
959 /* ARGSUSED */
960 void
961 get_icache_dtag(uint32_t ecache_idx, uint64_t *data)
962 {}
963
964 #else /* lint */
965
966 /*
967 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t
968 * structure (see cheetahregs.h):
969 * The Icache *Must* be turned off when this function is called.
970 * This is because diagnostic accesses to the Icache interfere with cache
971 * consistency.
972 */
973 .align 128
974 ENTRY(get_icache_dtag)
975 rdpr %pstate, %o5
976 andn %o5, PSTATE_IE | PSTATE_AM, %o3
977 wrpr %g0, %o3, %pstate
978
979 stx %o0, [%o1 + CH_IC_IDX]
980 ldxa [%o0]ASI_IC_TAG, %o2
981 stx %o2, [%o1 + CH_IC_PATAG]
982 add %o0, CH_ICTAG_UTAG, %o0
983 ldxa [%o0]ASI_IC_TAG, %o2
984 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
985 stx %o2, [%o1 + CH_IC_UTAG]
987 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
988 stx %o2, [%o1 + CH_IC_UPPER]
989 ldxa [%o0]ASI_IC_TAG, %o2
990 andn %o0, CH_ICTAG_TMASK, %o0
991 stx %o2, [%o1 + CH_IC_LOWER]
992 ldxa [%o0]ASI_IC_SNP_TAG, %o2
993 stx %o2, [%o1 + CH_IC_SNTAG]
994 add %o1, CH_IC_DATA, %o1
995 clr %o3
996 2:
997 ldxa [%o0 + %o3]ASI_IC_DATA, %o2
998 stx %o2, [%o1 + %o3]
999 cmp %o3, PN_IC_DATA_REG_SIZE - 8
1000 blt 2b
1001 add %o3, 8, %o3
1002
1003 retl
1004 wrpr %g0, %o5, %pstate
1005 SET_SIZE(get_icache_dtag)
1006
1007 #endif /* lint */
1008
1009 #if defined(lint)
1010
1011 /* ARGSUSED */
1012 void
1013 get_pcache_dtag(uint32_t pcache_idx, uint64_t *data)
1014 {}
1015
1016 #else /* lint */
1017
1018 /*
1019 * Get pcache data and tags.
1020 * inputs:
1021 * pcache_idx - fully constructed VA for for accessing P$ diagnostic
1022 * registers. Contains PC_way and PC_addr shifted into
1023 * the correct bit positions. See the PRM for more details.
1024 * data - pointer to a ch_pc_data_t
1025 * structure (see cheetahregs.h):
1026 */
1027 .align 128
1028 ENTRY(get_pcache_dtag)
1029 rdpr %pstate, %o5
1030 andn %o5, PSTATE_IE | PSTATE_AM, %o3
1031 wrpr %g0, %o3, %pstate
1032
1033 stx %o0, [%o1 + CH_PC_IDX]
1034 ldxa [%o0]ASI_PC_STATUS_DATA, %o2
1035 stx %o2, [%o1 + CH_PC_STATUS]
1036 ldxa [%o0]ASI_PC_TAG, %o2
1037 stx %o2, [%o1 + CH_PC_TAG]
1038 ldxa [%o0]ASI_PC_SNP_TAG, %o2
1039 stx %o2, [%o1 + CH_PC_SNTAG]
1040 add %o1, CH_PC_DATA, %o1
1041 clr %o3
1042 2:
1043 ldxa [%o0 + %o3]ASI_PC_DATA, %o2
1044 stx %o2, [%o1 + %o3]
1045 cmp %o3, CH_PC_DATA_REG_SIZE - 8
1046 blt 2b
1047 add %o3, 8, %o3
1048
1049 retl
1050 wrpr %g0, %o5, %pstate
1051 SET_SIZE(get_pcache_dtag)
1052
1053 #endif /* lint */
1054
1055 #endif /* CPU_IMP_L1_CACHE_PARITY */
1056
1057 #if defined(lint)
1058
1059 /* ARGSUSED */
1060 void
1061 set_dcu(uint64_t dcu)
1062 {}
1063
1064 #else /* lint */
1065
1066 /*
1067 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
1068 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
1069 * %o0 - 64 bit constant
1070 */
1071 ENTRY(set_dcu)
1072 stxa %o0, [%g0]ASI_DCU ! Store to DCU
1073 flush %g0 /* flush required after changing the IC bit */
1074 retl
1075 nop
1076 SET_SIZE(set_dcu)
1077
1078 #endif /* lint */
1079
1080
1081 #if defined(lint)
1082
1083 uint64_t
1084 get_dcu(void)
1085 {
1086 return ((uint64_t)0);
1087 }
1088
1089 #else /* lint */
1090
1091 /*
1092 * Return DCU register.
1093 */
1094 ENTRY(get_dcu)
1095 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */
1096 retl
1097 nop
1098 SET_SIZE(get_dcu)
1099
1100 #endif /* lint */
1101
1102 /*
1103 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
1104 *
1105 * This handler is used to check for softints generated by error trap
1106 * handlers to report errors. On Cheetah, this mechanism is used by the
1107 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
1108 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
1109 * NB: Must be 8 instructions or less to fit in trap table and code must
1110 * be relocatable.
1111 */
1112 #if defined(lint)
1113
1114 void
1115 ch_pil15_interrupt_instr(void)
1116 {}
1117
1118 #else /* lint */
1119
1120 ENTRY_NP(ch_pil15_interrupt_instr)
1121 ASM_JMP(%g1, ch_pil15_interrupt)
1122 SET_SIZE(ch_pil15_interrupt_instr)
1123
1124 #endif
1125
1126
1127 #if defined(lint)
1128
1129 void
1130 ch_pil15_interrupt(void)
1131 {}
1132
1133 #else /* lint */
1134
1135 ENTRY_NP(ch_pil15_interrupt)
1136
1137 /*
1138 * Since pil_interrupt is hacked to assume that every level 15
1139 * interrupt is generated by the CPU to indicate a performance
1140 * counter overflow this gets ugly. Before calling pil_interrupt
1141 * the Error at TL>0 pending status is inspected. If it is
1142 * non-zero, then an error has occurred and it is handled.
1143 * Otherwise control is transfered to pil_interrupt. Note that if
1144 * an error is detected pil_interrupt will not be called and
1145 * overflow interrupts may be lost causing erroneous performance
1146 * measurements. However, error-recovery will have a detrimental
1147 * effect on performance anyway.
1148 */
1149 CPU_INDEX(%g1, %g4)
1150 set ch_err_tl1_pending, %g4
1151 ldub [%g1 + %g4], %g2
1152 brz %g2, 1f
1153 nop
1154
1168 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
1169 * to process the Fast ECC/Cache Parity at TL>0 error. Clear
1170 * panic flag (%g2).
1171 */
1172 set cpu_tl1_error, %g1
1173 clr %g2
1174 ba sys_trap
1175 mov PIL_15, %g4
1176
1177 1:
1178 /*
1179 * The logout is invalid.
1180 *
1181 * Call the default interrupt handler.
1182 */
1183 sethi %hi(pil_interrupt), %g1
1184 jmp %g1 + %lo(pil_interrupt)
1185 mov PIL_15, %g4
1186
1187 SET_SIZE(ch_pil15_interrupt)
1188 #endif
1189
1190
1191 /*
1192 * Error Handling
1193 *
1194 * Cheetah provides error checking for all memory access paths between
1195 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
1196 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
1197 * AFAR and one of the following traps is generated (provided that it
1198 * is enabled in External Cache Error Enable Register) to handle that
1199 * error:
1200 * 1. trap 0x70: Precise trap
1201 * tt0_fecc for errors at trap level(TL)>=0
1202 * 2. trap 0x0A and 0x32: Deferred trap
1203 * async_err for errors at TL>=0
1204 * 3. trap 0x63: Disrupting trap
1205 * ce_err for errors at TL=0
1206 * (Note that trap 0x63 cannot happen at trap level > 0)
1207 *
1208 * Trap level one handlers panic the system except for the fast ecc
1261 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
1262 * the Fast ECC at TL>0 handler and eventually Red Mode.
1263 *
1264 * Note that for Cheetah (and only Cheetah), we use alias addresses for
1265 * flushing rather than ASI accesses (which don't exist on Cheetah).
1266 * Should we encounter a Fast ECC error within this handler on Cheetah,
1267 * there's a good chance it's within the ecache_flushaddr buffer (since
1268 * it's the largest piece of memory we touch in the handler and it is
1269 * usually kernel text/data). For that reason the Fast ECC at TL>0
1270 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
1271 */
1272
1273 /*
1274 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
1275 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
1276 * architecture-specific files.
1277 * NB: Must be 8 instructions or less to fit in trap table and code must
1278 * be relocatable.
1279 */
1280
1281 #if defined(lint)
1282
1283 void
1284 fecc_err_instr(void)
1285 {}
1286
1287 #else /* lint */
1288
1289 ENTRY_NP(fecc_err_instr)
1290 membar #Sync ! Cheetah requires membar #Sync
1291
1292 /*
1293 * Save current DCU state. Turn off the Dcache and Icache.
1294 */
1295 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
1296 andn %g1, DCU_DC + DCU_IC, %g4
1297 stxa %g4, [%g0]ASI_DCU
1298 flush %g0 /* flush required after changing the IC bit */
1299
1300 ASM_JMP(%g4, fast_ecc_err)
1301 SET_SIZE(fecc_err_instr)
1302
1303 #endif /* lint */
1304
1305
1306 #if !(defined(JALAPENO) || defined(SERRANO))
1307
1308 #if defined(lint)
1309
1310 void
1311 fast_ecc_err(void)
1312 {}
1313
1314 #else /* lint */
1315
1316 .section ".text"
1317 .align 64
1318 ENTRY_NP(fast_ecc_err)
1319
1320 /*
1321 * Turn off CEEN and NCEEN.
1322 */
1323 ldxa [%g0]ASI_ESTATE_ERR, %g3
1324 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1325 stxa %g4, [%g0]ASI_ESTATE_ERR
1326 membar #Sync ! membar sync required
1327
1328 /*
1329 * Check to see whether we need to park our sibling core
1330 * before recording diagnostic information from caches
1331 * which may be shared by both cores.
1332 * We use %g1 to store information about whether or not
1333 * we had to park the core (%g1 holds our DCUCR value and
1334 * we only use bits from that register which are "reserved"
1335 * to keep track of core parking) so that we know whether
1435 * Otherwise, if the logout structure was busy but we have not
1436 * nested more times than our maximum value, then we simply
1437 * issue a retry. Our TL=0 trap handler code will check and
1438 * clear the AFSR after it is done logging what is currently
1439 * in the logout struct and handle this event at that time.
1440 */
1441 retry
1442 8:
1443 /*
1444 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1445 * already at PIL 15.
1446 */
1447 set cpu_fast_ecc_error, %g1
1448 rdpr %pil, %g4
1449 cmp %g4, PIL_14
1450 ba sys_trap
1451 movl %icc, PIL_14, %g4
1452
1453 SET_SIZE(fast_ecc_err)
1454
1455 #endif /* lint */
1456
1457 #endif /* !(JALAPENO || SERRANO) */
1458
1459
1460 /*
1461 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1462 *
1463 * The basic flow of this trap handler is as follows:
1464 *
1465 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1466 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1467 * will use to save %g1 and %g2.
1468 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1469 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1470 * handler (using the just saved %g1).
1471 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1472 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1473 * NB: we don't turn off the Icache because bad data is not installed nor
1474 * will we be doing any diagnostic accesses.
1475 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1476 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1494 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther,
1495 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1496 * 14) Flush and re-enable the Dcache if it was on at step 3.
1497 * 15) Do TRAPTRACE if enabled.
1498 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1499 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1500 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the
1501 * event pending flag and call cpu_tl1_error via systrap if set.
1502 * 19) Restore the registers from step 5 and issue retry.
1503 */
1504
1505 /*
1506 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1507 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1508 * architecture-specific files. This generates a "Software Trap 0" at TL>0,
1509 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1510 * NB: Must be 8 instructions or less to fit in trap table and code must
1511 * be relocatable.
1512 */
1513
1514 #if defined(lint)
1515
1516 void
1517 fecc_err_tl1_instr(void)
1518 {}
1519
1520 #else /* lint */
1521
1522 ENTRY_NP(fecc_err_tl1_instr)
1523 CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1524 SET_SIZE(fecc_err_tl1_instr)
1525
1526 #endif /* lint */
1527
1528 /*
1529 * Software trap 0 at TL>0.
1530 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1531 * the various architecture-specific files. This is used as a continuation
1532 * of the fast ecc handling where we've bought an extra TL level, so we can
1533 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1534 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1535 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1536 * order two bits from %g1 and %g2 respectively).
1537 * NB: Must be 8 instructions or less to fit in trap table and code must
1538 * be relocatable.
1539 */
1540 #if defined(lint)
1541
1542 void
1543 fecc_err_tl1_cont_instr(void)
1544 {}
1545
1546 #else /* lint */
1547
1548 ENTRY_NP(fecc_err_tl1_cont_instr)
1549 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1550 SET_SIZE(fecc_err_tl1_cont_instr)
1551
1552 #endif /* lint */
1553
1554
1555 #if defined(lint)
1556
1557 void
1558 ce_err(void)
1559 {}
1560
1561 #else /* lint */
1562
1563 /*
1564 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1565 *
1566 * AFSR errors bits which cause this trap are:
1567 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1568 *
1569 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1570 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1571 *
1572 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1573 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1574 *
1575 * Cheetah+ also handles (No additional processing required):
1576 * DUE, DTO, DBERR (NCEEN controlled)
1577 * THCE (CEEN and ET_ECC_en controlled)
1578 * TUE (ET_ECC_en controlled)
1579 *
1580 * Panther further adds:
1581 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
1582 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
1721 /*
1722 * Otherwise, if the logout structure was busy but we have not
1723 * nested more times than our maximum value, then we simply
1724 * issue a retry. Our TL=0 trap handler code will check and
1725 * clear the AFSR after it is done logging what is currently
1726 * in the logout struct and handle this event at that time.
1727 */
1728 retry
1729 4:
1730 /*
1731 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1732 * already at PIL 15.
1733 */
1734 set cpu_disrupting_error, %g1
1735 rdpr %pil, %g4
1736 cmp %g4, PIL_14
1737 ba sys_trap
1738 movl %icc, PIL_14, %g4
1739 SET_SIZE(ce_err)
1740
1741 #endif /* lint */
1742
1743
1744 #if defined(lint)
1745
1746 /*
1747 * This trap cannot happen at TL>0 which means this routine will never
1748 * actually be called and so we treat this like a BAD TRAP panic.
1749 */
1750 void
1751 ce_err_tl1(void)
1752 {}
1753
1754 #else /* lint */
1755
1756 .align 64
1757 ENTRY_NP(ce_err_tl1)
1758
1759 call ptl1_panic
1760 mov PTL1_BAD_TRAP, %g1
1761
1762 SET_SIZE(ce_err_tl1)
1763
1764 #endif /* lint */
1765
1766
1767 #if defined(lint)
1768
1769 void
1770 async_err(void)
1771 {}
1772
1773 #else /* lint */
1774
1775 /*
1776 * The async_err function handles deferred trap types 0xA
1777 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1778 *
1779 * AFSR errors bits which cause this trap are:
1780 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1781 * On some platforms, EMU may causes cheetah to pull the error pin
1782 * never giving Solaris a chance to take a trap.
1783 *
1784 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1785 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1786 *
1787 * Steps:
1788 * 1. Disable CEEN and NCEEN errors to prevent recursive errors.
1789 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1790 * I$ line in DO_CPU_LOGOUT.
1791 * 3. Park sibling core if caches are shared (to avoid race
1792 * condition while accessing shared resources such as L3
1793 * data staging register during CPU logout.
1794 * 4. If the CPU logout structure is not currently being used:
1952 call ptl1_panic
1953 mov PTL1_BAD_ECC, %g1
1954
1955 3:
1956 /*
1957 * Otherwise, if the logout structure was busy but we have not
1958 * nested more times than our maximum value, then we simply
1959 * issue a retry. Our TL=0 trap handler code will check and
1960 * clear the AFSR after it is done logging what is currently
1961 * in the logout struct and handle this event at that time.
1962 */
1963 retry
1964 4:
1965 RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1966 async_err_resetskip:
1967 set cpu_deferred_error, %g1
1968 ba sys_trap
1969 mov PIL_15, %g4 ! run at pil 15
1970 SET_SIZE(async_err)
1971
1972 #endif /* lint */
1973
1974 #if defined(CPU_IMP_L1_CACHE_PARITY)
1975
1976 /*
1977 * D$ parity error trap (trap 71) at TL=0.
1978 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1979 * the various architecture-specific files. This merely sets up the
1980 * arguments for cpu_parity_error and calls it via sys_trap.
1981 * NB: Must be 8 instructions or less to fit in trap table and code must
1982 * be relocatable.
1983 */
1984 #if defined(lint)
1985
1986 void
1987 dcache_parity_instr(void)
1988 {}
1989
1990 #else /* lint */
1991 ENTRY_NP(dcache_parity_instr)
1992 membar #Sync ! Cheetah+ requires membar #Sync
1993 set cpu_parity_error, %g1
1994 or %g0, CH_ERR_DPE, %g2
1995 rdpr %tpc, %g3
1996 sethi %hi(sys_trap), %g7
1997 jmp %g7 + %lo(sys_trap)
1998 mov PIL_15, %g4 ! run at pil 15
1999 SET_SIZE(dcache_parity_instr)
2000
2001 #endif /* lint */
2002
2003
2004 /*
2005 * D$ parity error trap (trap 71) at TL>0.
2006 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
2007 * the various architecture-specific files. This generates a "Software
2008 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
2009 * continue the handling there.
2010 * NB: Must be 8 instructions or less to fit in trap table and code must
2011 * be relocatable.
2012 */
2013 #if defined(lint)
2014
2015 void
2016 dcache_parity_tl1_instr(void)
2017 {}
2018
2019 #else /* lint */
2020 ENTRY_NP(dcache_parity_tl1_instr)
2021 CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
2022 SET_SIZE(dcache_parity_tl1_instr)
2023
2024 #endif /* lint */
2025
2026
2027 /*
2028 * Software trap 1 at TL>0.
2029 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
2030 * of the various architecture-specific files. This is used as a continuation
2031 * of the dcache parity handling where we've bought an extra TL level, so we
2032 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2033 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2034 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
2035 * order two bits from %g1 and %g2 respectively).
2036 * NB: Must be 8 instructions or less to fit in trap table and code must
2037 * be relocatable.
2038 */
2039 #if defined(lint)
2040
2041 void
2042 dcache_parity_tl1_cont_instr(void)
2043 {}
2044
2045 #else /* lint */
2046 ENTRY_NP(dcache_parity_tl1_cont_instr)
2047 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
2048 SET_SIZE(dcache_parity_tl1_cont_instr)
2049
2050 #endif /* lint */
2051
2052 /*
2053 * D$ parity error at TL>0 handler
2054 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter
2055 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2056 */
2057 #if defined(lint)
2058
2059 void
2060 dcache_parity_tl1_err(void)
2061 {}
2062
2063 #else /* lint */
2064
2065 ENTRY_NP(dcache_parity_tl1_err)
2066
2067 /*
2068 * This macro saves all the %g registers in the ch_err_tl1_data
2069 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2070 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
2071 * the ch_err_tl1_data structure and %g2 will have the original
2072 * flags in the ch_err_tl1_data structure. All %g registers
2073 * except for %g1 and %g2 will be available.
2074 */
2075 CH_ERR_TL1_ENTER(CH_ERR_DPE);
2076
2077 #ifdef TRAPTRACE
2078 /*
2079 * Get current trap trace entry physical pointer.
2080 */
2081 CPU_INDEX(%g6, %g5)
2082 sll %g6, TRAPTR_SIZE_SHIFT, %g6
2083 set trap_trace_ctl, %g5
2084 add %g6, %g5, %g6
2135 * and HW does not automatically disable P$, we need to disable it
2136 * here so that we don't encounter any recursive traps when we
2137 * issue the retry.
2138 */
2139 ldxa [%g0]ASI_DCU, %g3
2140 mov 1, %g4
2141 sllx %g4, DCU_PE_SHIFT, %g4
2142 andn %g3, %g4, %g3
2143 stxa %g3, [%g0]ASI_DCU
2144 membar #Sync
2145
2146 /*
2147 * We fall into this macro if we've successfully logged the error in
2148 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2149 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
2150 * Restores the %g registers and issues retry.
2151 */
2152 CH_ERR_TL1_EXIT;
2153 SET_SIZE(dcache_parity_tl1_err)
2154
2155 #endif /* lint */
2156
2157 /*
2158 * I$ parity error trap (trap 72) at TL=0.
2159 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
2160 * the various architecture-specific files. This merely sets up the
2161 * arguments for cpu_parity_error and calls it via sys_trap.
2162 * NB: Must be 8 instructions or less to fit in trap table and code must
2163 * be relocatable.
2164 */
2165 #if defined(lint)
2166
2167 void
2168 icache_parity_instr(void)
2169 {}
2170
2171 #else /* lint */
2172
2173 ENTRY_NP(icache_parity_instr)
2174 membar #Sync ! Cheetah+ requires membar #Sync
2175 set cpu_parity_error, %g1
2176 or %g0, CH_ERR_IPE, %g2
2177 rdpr %tpc, %g3
2178 sethi %hi(sys_trap), %g7
2179 jmp %g7 + %lo(sys_trap)
2180 mov PIL_15, %g4 ! run at pil 15
2181 SET_SIZE(icache_parity_instr)
2182
2183 #endif /* lint */
2184
2185 /*
2186 * I$ parity error trap (trap 72) at TL>0.
2187 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
2188 * the various architecture-specific files. This generates a "Software
2189 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
2190 * continue the handling there.
2191 * NB: Must be 8 instructions or less to fit in trap table and code must
2192 * be relocatable.
2193 */
2194 #if defined(lint)
2195
2196 void
2197 icache_parity_tl1_instr(void)
2198 {}
2199
2200 #else /* lint */
2201 ENTRY_NP(icache_parity_tl1_instr)
2202 CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
2203 SET_SIZE(icache_parity_tl1_instr)
2204
2205 #endif /* lint */
2206
2207 /*
2208 * Software trap 2 at TL>0.
2209 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
2210 * of the various architecture-specific files. This is used as a continuation
2211 * of the icache parity handling where we've bought an extra TL level, so we
2212 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
2213 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
2214 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
2215 * order two bits from %g1 and %g2 respectively).
2216 * NB: Must be 8 instructions or less to fit in trap table and code must
2217 * be relocatable.
2218 */
2219 #if defined(lint)
2220
2221 void
2222 icache_parity_tl1_cont_instr(void)
2223 {}
2224
2225 #else /* lint */
2226 ENTRY_NP(icache_parity_tl1_cont_instr)
2227 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
2228 SET_SIZE(icache_parity_tl1_cont_instr)
2229
2230 #endif /* lint */
2231
2232
2233 /*
2234 * I$ parity error at TL>0 handler
2235 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter
2236 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
2237 */
2238 #if defined(lint)
2239
2240 void
2241 icache_parity_tl1_err(void)
2242 {}
2243
2244 #else /* lint */
2245
2246 ENTRY_NP(icache_parity_tl1_err)
2247
2248 /*
2249 * This macro saves all the %g registers in the ch_err_tl1_data
2250 * structure, updates the ch_err_tl1_flags and saves the %tpc in
2251 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
2252 * the ch_err_tl1_data structure and %g2 will have the original
2253 * flags in the ch_err_tl1_data structure. All %g registers
2254 * except for %g1 and %g2 will be available.
2255 */
2256 CH_ERR_TL1_ENTER(CH_ERR_IPE);
2257
2258 #ifdef TRAPTRACE
2259 /*
2260 * Get current trap trace entry physical pointer.
2261 */
2262 CPU_INDEX(%g6, %g5)
2263 sll %g6, TRAPTR_SIZE_SHIFT, %g6
2264 set trap_trace_ctl, %g5
2265 add %g6, %g5, %g6
2301 ld [%g6 + TRAPTR_LIMIT], %g4
2302 st %g5, [%g6 + TRAPTR_LAST_OFFSET]
2303 add %g5, TRAP_ENT_SIZE, %g5
2304 sub %g4, TRAP_ENT_SIZE, %g4
2305 cmp %g5, %g4
2306 movge %icc, 0, %g5
2307 st %g5, [%g6 + TRAPTR_OFFSET]
2308 ipe_tl1_skip_tt:
2309 #endif /* TRAPTRACE */
2310
2311 /*
2312 * We fall into this macro if we've successfully logged the error in
2313 * the ch_err_tl1_data structure and want the PIL15 softint to pick
2314 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
2315 * Restores the %g registers and issues retry.
2316 */
2317 CH_ERR_TL1_EXIT;
2318
2319 SET_SIZE(icache_parity_tl1_err)
2320
2321 #endif /* lint */
2322
2323 #endif /* CPU_IMP_L1_CACHE_PARITY */
2324
2325
2326 /*
2327 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
2328 * tte, the virtual address, and the ctxnum of the specified tlb entry. They
2329 * should only be used in places where you have no choice but to look at the
2330 * tlb itself.
2331 *
2332 * Note: These two routines are required by the Estar "cpr" loadable module.
2333 */
2334
2335 #if defined(lint)
2336
2337 /* ARGSUSED */
2338 void
2339 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2340 {}
2341
2342 #else /* lint */
2343
2344 ENTRY_NP(itlb_rd_entry)
2345 sllx %o0, 3, %o0
2346 ldxa [%o0]ASI_ITLB_ACCESS, %g1
2347 stx %g1, [%o1]
2348 ldxa [%o0]ASI_ITLB_TAGREAD, %g2
2349 set TAGREAD_CTX_MASK, %o4
2350 andn %g2, %o4, %o5
2351 retl
2352 stx %o5, [%o2]
2353 SET_SIZE(itlb_rd_entry)
2354
2355 #endif /* lint */
2356
2357
2358 #if defined(lint)
2359
2360 /* ARGSUSED */
2361 void
2362 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2363 {}
2364
2365 #else /* lint */
2366
2367 ENTRY_NP(dtlb_rd_entry)
2368 sllx %o0, 3, %o0
2369 ldxa [%o0]ASI_DTLB_ACCESS, %g1
2370 stx %g1, [%o1]
2371 ldxa [%o0]ASI_DTLB_TAGREAD, %g2
2372 set TAGREAD_CTX_MASK, %o4
2373 andn %g2, %o4, %o5
2374 retl
2375 stx %o5, [%o2]
2376 SET_SIZE(dtlb_rd_entry)
2377 #endif /* lint */
2378
2379
2380 #if !(defined(JALAPENO) || defined(SERRANO))
2381
2382 #if defined(lint)
2383
2384 uint64_t
2385 get_safari_config(void)
2386 { return (0); }
2387
2388 #else /* lint */
2389
2390 ENTRY(get_safari_config)
2391 ldxa [%g0]ASI_SAFARI_CONFIG, %o0
2392 retl
2393 nop
2394 SET_SIZE(get_safari_config)
2395
2396 #endif /* lint */
2397
2398
2399 #if defined(lint)
2400
2401 /* ARGSUSED */
2402 void
2403 set_safari_config(uint64_t safari_config)
2404 {}
2405
2406 #else /* lint */
2407
2408 ENTRY(set_safari_config)
2409 stxa %o0, [%g0]ASI_SAFARI_CONFIG
2410 membar #Sync
2411 retl
2412 nop
2413 SET_SIZE(set_safari_config)
2414
2415 #endif /* lint */
2416
2417 #endif /* !(JALAPENO || SERRANO) */
2418
2419
2420 #if defined(lint)
2421
2422 void
2423 cpu_cleartickpnt(void)
2424 {}
2425
2426 #else /* lint */
2427 /*
2428 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
2429 * registers. In an effort to make the change in the
2430 * tick/stick counter as consistent as possible, we disable
2431 * all interrupts while we're changing the registers. We also
2432 * ensure that the read and write instructions are in the same
2433 * line in the instruction cache.
2434 */
2435 ENTRY_NP(cpu_clearticknpt)
2436 rdpr %pstate, %g1 /* save processor state */
2437 andn %g1, PSTATE_IE, %g3 /* turn off */
2438 wrpr %g0, %g3, %pstate /* interrupts */
2439 rdpr %tick, %g2 /* get tick register */
2440 brgez,pn %g2, 1f /* if NPT bit off, we're done */
2441 mov 1, %g3 /* create mask */
2442 sllx %g3, 63, %g3 /* for NPT bit */
2443 ba,a,pt %xcc, 2f
2444 .align 8 /* Ensure rd/wr in same i$ line */
2445 2:
2446 rdpr %tick, %g2 /* get tick register */
2447 wrpr %g3, %g2, %tick /* write tick register, */
2448 /* clearing NPT bit */
2449 1:
2450 rd STICK, %g2 /* get stick register */
2451 brgez,pn %g2, 3f /* if NPT bit off, we're done */
2452 mov 1, %g3 /* create mask */
2453 sllx %g3, 63, %g3 /* for NPT bit */
2454 ba,a,pt %xcc, 4f
2455 .align 8 /* Ensure rd/wr in same i$ line */
2456 4:
2457 rd STICK, %g2 /* get stick register */
2458 wr %g3, %g2, STICK /* write stick register, */
2459 /* clearing NPT bit */
2460 3:
2461 jmp %g4 + 4
2462 wrpr %g0, %g1, %pstate /* restore processor state */
2463
2464 SET_SIZE(cpu_clearticknpt)
2465
2466 #endif /* lint */
2467
2468
2469 #if defined(CPU_IMP_L1_CACHE_PARITY)
2470
2471 #if defined(lint)
2472 /*
2473 * correct_dcache_parity(size_t size, size_t linesize)
2474 *
2475 * Correct D$ data parity by zeroing the data and initializing microtag
2476 * for all indexes and all ways of the D$.
2477 *
2478 */
2479 /* ARGSUSED */
2480 void
2481 correct_dcache_parity(size_t size, size_t linesize)
2482 {}
2483
2484 #else /* lint */
2485
2486 ENTRY(correct_dcache_parity)
2487 /*
2488 * Register Usage:
2489 *
2490 * %o0 = input D$ size
2491 * %o1 = input D$ line size
2492 * %o2 = scratch
2493 * %o3 = scratch
2494 * %o4 = scratch
2495 */
2496
2497 sub %o0, %o1, %o0 ! init cache line address
2498
2499 /*
2500 * For Panther CPUs, we also need to clear the data parity bits
2501 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2502 */
2503 GET_CPU_IMPL(%o3)
2504 cmp %o3, PANTHER_IMPL
2505 bne 1f
2527 membar #Sync ! required after ASI_DC_DATA
2528 /*
2529 * We also clear the parity bits if this is a panther. For non-Panther
2530 * CPUs, we simply end up clearing the $data register twice.
2531 */
2532 stxa %g0, [%o4 + %o2]ASI_DC_DATA
2533 membar #Sync
2534
2535 subcc %o2, 8, %o2
2536 bge 2b
2537 nop
2538
2539 subcc %o0, %o1, %o0
2540 bge 1b
2541 nop
2542
2543 retl
2544 nop
2545 SET_SIZE(correct_dcache_parity)
2546
2547 #endif /* lint */
2548
2549 #endif /* CPU_IMP_L1_CACHE_PARITY */
2550
2551
2552 #if defined(lint)
2553 /*
2554 * Get timestamp (stick).
2555 */
2556 /* ARGSUSED */
2557 void
2558 stick_timestamp(int64_t *ts)
2559 {
2560 }
2561
2562 #else /* lint */
2563
2564 ENTRY_NP(stick_timestamp)
2565 rd STICK, %g1 ! read stick reg
2566 sllx %g1, 1, %g1
2567 srlx %g1, 1, %g1 ! clear npt bit
2568
2569 retl
2570 stx %g1, [%o0] ! store the timestamp
2571 SET_SIZE(stick_timestamp)
2572
2573 #endif /* lint */
2574
2575
2576 #if defined(lint)
2577 /*
2578 * Set STICK adjusted by skew.
2579 */
2580 /* ARGSUSED */
2581 void
2582 stick_adj(int64_t skew)
2583 {
2584 }
2585
2586 #else /* lint */
2587
2588 ENTRY_NP(stick_adj)
2589 rdpr %pstate, %g1 ! save processor state
2590 andn %g1, PSTATE_IE, %g3
2591 ba 1f ! cache align stick adj
2592 wrpr %g0, %g3, %pstate ! turn off interrupts
2593
2594 .align 16
2595 1: nop
2596
2597 rd STICK, %g4 ! read stick reg
2598 add %g4, %o0, %o1 ! adjust stick with skew
2599 wr %o1, %g0, STICK ! write stick reg
2600
2601 retl
2602 wrpr %g1, %pstate ! restore processor state
2603 SET_SIZE(stick_adj)
2604
2605 #endif /* lint */
2606
2607 #if defined(lint)
2608 /*
2609 * Debugger-specific stick retrieval
2610 */
2611 /*ARGSUSED*/
2612 int
2613 kdi_get_stick(uint64_t *stickp)
2614 {
2615 return (0);
2616 }
2617
2618 #else /* lint */
2619
2620 ENTRY_NP(kdi_get_stick)
2621 rd STICK, %g1
2622 stx %g1, [%o0]
2623 retl
2624 mov %g0, %o0
2625 SET_SIZE(kdi_get_stick)
2626
2627 #endif /* lint */
2628
2629 #if defined(lint)
2630 /*
2631 * Invalidate the specified line from the D$.
2632 *
2633 * Register usage:
2634 * %o0 - index for the invalidation, specifies DC_way and DC_addr
2635 *
2636 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2637 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2638 *
2639 * The format of the stored 64-bit value is:
2640 *
2641 * +----------+--------+----------+
2642 * | Reserved | DC_tag | DC_valid |
2643 * +----------+--------+----------+
2644 * 63 31 30 1 0
2645 *
2646 * DC_tag is the 30-bit physical tag of the associated line.
2647 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2648 *
2649 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2650 *
2651 * +----------+--------+----------+----------+
2652 * | Reserved | DC_way | DC_addr | Reserved |
2653 * +----------+--------+----------+----------+
2654 * 63 16 15 14 13 5 4 0
2655 *
2656 * DC_way is a 2-bit index that selects one of the 4 ways.
2657 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2658 *
2659 * Setting the DC_valid bit to zero for the specified DC_way and
2660 * DC_addr index into the D$ results in an invalidation of a D$ line.
2661 */
2662 /*ARGSUSED*/
2663 void
2664 dcache_inval_line(int index)
2665 {
2666 }
2667 #else /* lint */
2668 ENTRY(dcache_inval_line)
2669 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr
2670 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits
2671 membar #Sync
2672 retl
2673 nop
2674 SET_SIZE(dcache_inval_line)
2675 #endif /* lint */
2676
2677 #if defined(lint)
2678 /*
2679 * Invalidate the entire I$
2680 *
2681 * Register usage:
2682 * %o0 - specifies IC_way, IC_addr, IC_tag
2683 * %o1 - scratch
2684 * %o2 - used to save and restore DCU value
2685 * %o3 - scratch
2686 * %o5 - used to save and restore PSTATE
2687 *
2688 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2689 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2690 * block out snoops and invalidates to the I$, causing I$ consistency
2691 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2692 *
2693 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2694 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2695 * info below describes store (write) use of ASI_IC_TAG. Note that read
2696 * use of ASI_IC_TAG behaves differently.
2697 *
2705 * Valid is the 1-bit valid field for both the physical and snoop tags.
2706 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2707 * the 32-byte boundary aligned address specified by IC_addr.
2708 *
2709 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2710 *
2711 * +----------+--------+---------+--------+---------+
2712 * | Reserved | IC_way | IC_addr | IC_tag |Reserved |
2713 * +----------+--------+---------+--------+---------+
2714 * 63 16 15 14 13 5 4 3 2 0
2715 *
2716 * IC_way is a 2-bit index that selects one of the 4 ways.
2717 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2718 * IC_addr[5] is a "don't care" for a store.
2719 * IC_tag set to 2 specifies that the stored value is to be interpreted
2720 * as containing Valid and IC_vpred as described above.
2721 *
2722 * Setting the Valid bit to zero for the specified IC_way and
2723 * IC_addr index into the I$ results in an invalidation of an I$ line.
2724 */
2725 /*ARGSUSED*/
2726 void
2727 icache_inval_all(void)
2728 {
2729 }
2730 #else /* lint */
2731 ENTRY(icache_inval_all)
2732 rdpr %pstate, %o5
2733 andn %o5, PSTATE_IE, %o3
2734 wrpr %g0, %o3, %pstate ! clear IE bit
2735
2736 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2737 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
2738 ba,pt %icc, 2f
2739 ld [%o0 + CHPR_ICACHE_SIZE], %o0
2740 icache_inval_all_1:
2741 ASM_LD(%o0, icache_size)
2742 ASM_LD(%o1, icache_linesize)
2743 2:
2744 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2745
2746 retl
2747 wrpr %g0, %o5, %pstate ! restore earlier pstate
2748 SET_SIZE(icache_inval_all)
2749 #endif /* lint */
2750
2751
2752 #if defined(lint)
2753 /* ARGSUSED */
2754 void
2755 cache_scrubreq_tl1(uint64_t inum, uint64_t index)
2756 {
2757 }
2758
2759 #else /* lint */
2760 /*
2761 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2762 * crosstrap. It atomically increments the outstanding request counter and,
2763 * if there was not already an outstanding request, branches to setsoftint_tl1
2764 * to enqueue an intr_vec for the given inum.
2765 */
2766
2767 ! Register usage:
2768 !
2769 ! Arguments:
2770 ! %g1 - inum
2771 ! %g2 - index into chsm_outstanding array
2772 !
2773 ! Internal:
2774 ! %g2, %g3, %g5 - scratch
2775 ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2776 ! %g6 - setsoftint_tl1 address
2777
2778 ENTRY_NP(cache_scrubreq_tl1)
2779 mulx %g2, CHSM_OUTSTANDING_INCR, %g2
2780 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2781 add %g2, %g3, %g2
2782 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2783 ld [%g4], %g2 ! cpu's chsm_outstanding[index]
2784 !
2785 ! no need to use atomic instructions for the following
2786 ! increment - we're at tl1
2787 !
2788 add %g2, 0x1, %g3
2789 brnz,pn %g2, 1f ! no need to enqueue more intr_vec
2790 st %g3, [%g4] ! delay - store incremented counter
2791 ASM_JMP(%g6, setsoftint_tl1)
2792 ! not reached
2793 1:
2794 retry
2795 SET_SIZE(cache_scrubreq_tl1)
2796
2797 #endif /* lint */
2798
2799
2800 #if defined(lint)
2801
2802 /* ARGSUSED */
2803 void
2804 get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs)
2805 {}
2806
2807 #else /* lint */
2808
2809 /*
2810 * Get the error state for the processor.
2811 * Note that this must not be used at TL>0
2812 */
2813 ENTRY(get_cpu_error_state)
2814 #if defined(CHEETAH_PLUS)
2815 set ASI_SHADOW_REG_VA, %o2
2816 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg
2817 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2818 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg
2819 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2820 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers
2821 cmp %o3, PANTHER_IMPL
2822 bne,a 1f
2823 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN
2824 set ASI_AFSR_EXT_VA, %o2
2825 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg
2826 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2827 set ASI_SHADOW_AFSR_EXT_VA, %o2
2828 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg
2836 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2837 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2838 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2839 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2840 #endif /* CHEETAH_PLUS */
2841 #if defined(SERRANO)
2842 /*
2843 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2844 * We save this in the afar2 of the register save area.
2845 */
2846 set ASI_MCU_AFAR2_VA, %o2
2847 ldxa [%o2]ASI_MCU_CTRL, %o1
2848 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2849 #endif /* SERRANO */
2850 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg
2851 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR]
2852 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg
2853 retl
2854 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR]
2855 SET_SIZE(get_cpu_error_state)
2856 #endif /* lint */
2857
2858 #if defined(lint)
2859
2860 /*
2861 * Check a page of memory for errors.
2862 *
2863 * Load each 64 byte block from physical memory.
2864 * Check AFSR after each load to see if an error
2865 * was caused. If so, log/scrub that error.
2866 *
2867 * Used to determine if a page contains
2868 * CEs when CEEN is disabled.
2869 */
2870 /*ARGSUSED*/
2871 void
2872 cpu_check_block(caddr_t va, uint_t psz)
2873 {}
2874
2875 #else /* lint */
2876
2877 ENTRY(cpu_check_block)
2878 !
2879 ! get a new window with room for the error regs
2880 !
2881 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2882 srl %i1, 6, %l4 ! clear top bits of psz
2883 ! and divide by 64
2884 rd %fprs, %l2 ! store FP
2885 wr %g0, FPRS_FEF, %fprs ! enable FP
2886 1:
2887 ldda [%i0]ASI_BLK_P, %d0 ! load a block
2888 membar #Sync
2889 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg
2890 brz,a,pt %l3, 2f ! check for error
2891 nop
2892
2893 !
2894 ! if error, read the error regs and log it
2895 !
2896 call get_cpu_error_state
2897 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2898
2899 !
2900 ! cpu_ce_detected(ch_cpu_errors_t *, flag)
2901 !
2902 call cpu_ce_detected ! log the error
2903 mov CE_CEEN_TIMEOUT, %o1
2904 2:
2905 dec %l4 ! next 64-byte block
2906 brnz,a,pt %l4, 1b
2907 add %i0, 64, %i0 ! increment block addr
2908
2909 wr %l2, %g0, %fprs ! restore FP
2910 ret
2911 restore
2912
2913 SET_SIZE(cpu_check_block)
2914
2915 #endif /* lint */
2916
2917 #if defined(lint)
2918
2919 /*
2920 * Perform a cpu logout called from C. This is used where we did not trap
2921 * for the error but still want to gather "what we can". Caller must make
2922 * sure cpu private area exists and that the indicated logout area is free
2923 * for use, and that we are unable to migrate cpus.
2924 */
2925 /*ARGSUSED*/
2926 void
2927 cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop)
2928 { }
2929
2930 #else
2931 ENTRY(cpu_delayed_logout)
2932 rdpr %pstate, %o2
2933 andn %o2, PSTATE_IE, %o2
2934 wrpr %g0, %o2, %pstate ! disable interrupts
2935 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value
2936 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2937 rd %asi, %g1
2938 wr %g0, ASI_P, %asi
2939 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2940 wr %g1, %asi
2941 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again
2942 rdpr %pstate, %o2
2943 or %o2, PSTATE_IE, %o2
2944 wrpr %g0, %o2, %pstate
2945 retl
2946 nop
2947 SET_SIZE(cpu_delayed_logout)
2948
2949 #endif /* lint */
2950
2951 #if defined(lint)
2952
2953 /*ARGSUSED*/
2954 int
2955 dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain)
2956 { return (0); }
2957
2958 #else
2959
2960 ENTRY(dtrace_blksuword32)
2961 save %sp, -SA(MINFRAME + 4), %sp
2962
2963 rdpr %pstate, %l1
2964 andn %l1, PSTATE_IE, %l2 ! disable interrupts to
2965 wrpr %g0, %l2, %pstate ! protect our FPU diddling
2966
2967 rd %fprs, %l0
2968 andcc %l0, FPRS_FEF, %g0
2969 bz,a,pt %xcc, 1f ! if the fpu is disabled
2970 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu
2971
2972 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack
2973 1:
2974 set 0f, %l5
2975 /*
2976 * We're about to write a block full or either total garbage
2977 * (not kernel data, don't worry) or user floating-point data
2978 * (so it only _looks_ like garbage).
2979 */
3004
3005 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
3006 1:
3007
3008 wrpr %g0, %l1, %pstate ! restore interrupts
3009
3010 /*
3011 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
3012 * which deals with watchpoints. Otherwise, just return -1.
3013 */
3014 brnz,pt %i2, 1f
3015 nop
3016 ret
3017 restore %g0, -1, %o0
3018 1:
3019 call dtrace_blksuword32_err
3020 restore
3021
3022 SET_SIZE(dtrace_blksuword32)
3023
3024 #endif /* lint */
3025
3026 #ifdef CHEETAHPLUS_ERRATUM_25
3027
3028 #if defined(lint)
3029 /*
3030 * Claim a chunk of physical address space.
3031 */
3032 /*ARGSUSED*/
3033 void
3034 claimlines(uint64_t pa, size_t sz, int stride)
3035 {}
3036 #else /* lint */
3037 ENTRY(claimlines)
3038 1:
3039 subcc %o1, %o2, %o1
3040 add %o0, %o1, %o3
3041 bgeu,a,pt %xcc, 1b
3042 casxa [%o3]ASI_MEM, %g0, %g0
3043 membar #Sync
3044 retl
3045 nop
3046 SET_SIZE(claimlines)
3047 #endif /* lint */
3048
3049 #if defined(lint)
3050 /*
3051 * CPU feature initialization,
3052 * turn BPE off,
3053 * get device id.
3054 */
3055 /*ARGSUSED*/
3056 void
3057 cpu_feature_init(void)
3058 {}
3059 #else /* lint */
3060 ENTRY(cpu_feature_init)
3061 save %sp, -SA(MINFRAME), %sp
3062 sethi %hi(cheetah_bpe_off), %o0
3063 ld [%o0 + %lo(cheetah_bpe_off)], %o0
3064 brz %o0, 1f
3065 nop
3066 rd ASR_DISPATCH_CONTROL, %o0
3067 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0
3068 wr %o0, 0, ASR_DISPATCH_CONTROL
3069 1:
3070 !
3071 ! get the device_id and store the device_id
3072 ! in the appropriate cpunodes structure
3073 ! given the cpus index
3074 !
3075 CPU_INDEX(%o0, %o1)
3076 mulx %o0, CPU_NODE_SIZE, %o0
3077 set cpunodes + DEVICE_ID, %o1
3078 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
3079 stx %o2, [%o0 + %o1]
3080 #ifdef CHEETAHPLUS_ERRATUM_34
3081 !
3082 ! apply Cheetah+ erratum 34 workaround
3083 !
3084 call itlb_erratum34_fixup
3085 nop
3086 call dtlb_erratum34_fixup
3087 nop
3088 #endif /* CHEETAHPLUS_ERRATUM_34 */
3089 ret
3090 restore
3091 SET_SIZE(cpu_feature_init)
3092 #endif /* lint */
3093
3094 #if defined(lint)
3095 /*
3096 * Copy a tsb entry atomically, from src to dest.
3097 * src must be 128 bit aligned.
3098 */
3099 /*ARGSUSED*/
3100 void
3101 copy_tsb_entry(uintptr_t src, uintptr_t dest)
3102 {}
3103 #else /* lint */
3104 ENTRY(copy_tsb_entry)
3105 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data
3106 stx %o2, [%o1]
3107 stx %o3, [%o1 + 8 ]
3108 retl
3109 nop
3110 SET_SIZE(copy_tsb_entry)
3111 #endif /* lint */
3112
3113 #endif /* CHEETAHPLUS_ERRATUM_25 */
3114
3115 #ifdef CHEETAHPLUS_ERRATUM_34
3116
3117 #if defined(lint)
3118
3119 /*ARGSUSED*/
3120 void
3121 itlb_erratum34_fixup(void)
3122 {}
3123
3124 #else /* lint */
3125
3126 !
3127 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
3128 ! index 0 TTE will erroneously be displaced when a new TTE is
3129 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34,
3130 ! locked index 0 TTEs must be relocated.
3131 !
3132 ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
3133 !
3134 ENTRY_NP(itlb_erratum34_fixup)
3135 rdpr %pstate, %o3
3136 #ifdef DEBUG
3137 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
3138 #endif /* DEBUG */
3139 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
3140 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data
3141 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag
3142
3143 cmp %o1, %g0 ! Is this entry valid?
3144 bge %xcc, 1f
3145 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
3170 !
3171 cmp %o4, %g0 ! TTE is > 0 iff not valid
3172 bge %xcc, 4f ! If invalid, go displace
3173 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
3174 bnz,a %icc, 3b ! If locked, look at next
3175 add %g3, (1 << 3), %g3 ! entry
3176 4:
3177 !
3178 ! We found an unlocked or invalid entry; we'll explicitly load
3179 ! the former index 0 entry here.
3180 !
3181 sethi %hi(FLUSH_ADDR), %o4
3182 set MMU_TAG_ACCESS, %g4
3183 stxa %o2, [%g4]ASI_IMMU
3184 stxa %o1, [%g3]ASI_ITLB_ACCESS
3185 flush %o4 ! Flush required for I-MMU
3186 retl
3187 wrpr %g0, %o3, %pstate ! Enable interrupts
3188 SET_SIZE(itlb_erratum34_fixup)
3189
3190 #endif /* lint */
3191
3192 #if defined(lint)
3193
3194 /*ARGSUSED*/
3195 void
3196 dtlb_erratum34_fixup(void)
3197 {}
3198
3199 #else /* lint */
3200
3201 !
3202 ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
3203 ! index 0 TTE will erroneously be displaced when a new TTE is
3204 ! loaded. In order to avoid cheetah+ erratum 34, locked index 0
3205 ! TTEs must be relocated.
3206 !
3207 ENTRY_NP(dtlb_erratum34_fixup)
3208 rdpr %pstate, %o3
3209 #ifdef DEBUG
3210 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
3211 #endif /* DEBUG */
3212 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
3213 ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data
3214 ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag
3215
3216 cmp %o1, %g0 ! Is this entry valid?
3217 bge %xcc, 1f
3218 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
3219 bnz %icc, 2f
3220 nop
3239 ! of the lock bit).
3240 !
3241 cmp %o4, %g0 ! TTE is > 0 iff not valid
3242 bge %xcc, 4f ! If invalid, go displace
3243 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
3244 bnz,a %icc, 3b ! If locked, look at next
3245 add %g3, (1 << 3), %g3 ! entry
3246 4:
3247 !
3248 ! We found an unlocked or invalid entry; we'll explicitly load
3249 ! the former index 0 entry here.
3250 !
3251 set MMU_TAG_ACCESS, %g4
3252 stxa %o2, [%g4]ASI_DMMU
3253 stxa %o1, [%g3]ASI_DTLB_ACCESS
3254 membar #Sync
3255 retl
3256 wrpr %g0, %o3, %pstate ! Enable interrupts
3257 SET_SIZE(dtlb_erratum34_fixup)
3258
3259 #endif /* lint */
3260
3261 #endif /* CHEETAHPLUS_ERRATUM_34 */
3262
|
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Assembly code support for Cheetah/Cheetah+ modules
26 */
27
28 #include "assym.h"
29
30 #include <sys/asm_linkage.h>
31 #include <sys/mmu.h>
32 #include <vm/hat_sfmmu.h>
33 #include <sys/machparam.h>
34 #include <sys/machcpuvar.h>
35 #include <sys/machthread.h>
36 #include <sys/machtrap.h>
37 #include <sys/privregs.h>
38 #include <sys/trap.h>
39 #include <sys/cheetahregs.h>
40 #include <sys/us3_module.h>
41 #include <sys/xc_impl.h>
42 #include <sys/intreg.h>
43 #include <sys/async.h>
44 #include <sys/clock.h>
45 #include <sys/cheetahasm.h>
46 #include <sys/cmpregs.h>
47
48 #ifdef TRAPTRACE
49 #include <sys/traptrace.h>
50 #endif /* TRAPTRACE */
51
52 /* BEGIN CSTYLED */
53
54 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \
55 ldxa [%g0]ASI_DCU, tmp1 ;\
56 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\
57 bz,pn %icc, 1f ;\
58 ASM_LD(tmp1, dcache_linesize) ;\
59 ASM_LD(tmp2, dflush_type) ;\
60 cmp tmp2, FLUSHPAGE_TYPE ;\
61 be,pt %icc, 2f ;\
62 nop ;\
63 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\
64 ASM_LD(tmp3, dcache_size) ;\
65 cmp tmp2, FLUSHMATCH_TYPE ;\
66 be,pt %icc, 3f ;\
67 nop ;\
68 /* \
69 * flushtype = FLUSHALL_TYPE, flush the whole thing \
70 * tmp3 = cache size \
71 * tmp1 = cache line size \
148 sllx arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */ \
149 mov way, tmp3; \
150 sllx tmp3, 14, tmp3; /* One way 16K */ \
151 or tmp2, tmp3, tmp3; \
152 set MMU_PAGESIZE, tmp2; \
153 /* \
154 * tmp2 = page size \
155 * tmp3 = cached page in dcache \
156 */ \
157 sub tmp2, tmp1, tmp2; \
158 2: \
159 stxa %g0, [tmp3 + tmp2]ASI_DC_TAG; \
160 membar #Sync; \
161 cmp %g0, tmp2; \
162 bne,pt %icc, 2b; \
163 sub tmp2, tmp1, tmp2; \
164 1:
165
166 /* END CSTYLED */
167
168 /*
169 * Cheetah MMU and Cache operations.
170 */
171
172 ENTRY_NP(vtag_flushpage)
173 /*
174 * flush page from the tlb
175 *
176 * %o0 = vaddr
177 * %o1 = sfmmup
178 */
179 rdpr %pstate, %o5
180 #ifdef DEBUG
181 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
182 #endif /* DEBUG */
183 /*
184 * disable ints
185 */
186 andn %o5, PSTATE_IE, %o4
187 wrpr %o4, 0, %pstate
188
189 /*
190 * Then, blow out the tlb
191 * Interrupts are disabled to prevent the primary ctx register
220
221 wrpr %g0, 1, %tl
222 set MMU_PCONTEXT, %o4
223 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
224 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum
225 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz
226 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz
227 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum
228 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum
229
230 stxa %g0, [%o0]ASI_DTLB_DEMAP
231 stxa %g0, [%o0]ASI_ITLB_DEMAP
232 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */
233 flush %o3
234 wrpr %g0, 0, %tl
235
236 retl
237 wrpr %g0, %o5, %pstate /* enable interrupts */
238 SET_SIZE(vtag_flushpage)
239
240 ENTRY_NP2(vtag_flushall, demap_all)
241 /*
242 * flush the tlb
243 */
244 sethi %hi(FLUSH_ADDR), %o3
245 set DEMAP_ALL_TYPE, %g1
246 stxa %g0, [%g1]ASI_DTLB_DEMAP
247 stxa %g0, [%g1]ASI_ITLB_DEMAP
248 flush %o3
249 retl
250 nop
251 SET_SIZE(demap_all)
252 SET_SIZE(vtag_flushall)
253
254
255 ENTRY_NP(vtag_flushpage_tl1)
256 /*
257 * x-trap to flush page from tlb and tsb
258 *
259 * %g1 = vaddr, zero-extended on 32-bit kernel
260 * %g2 = sfmmup
261 *
262 * assumes TSBE_TAG = 0
263 */
264 srln %g1, MMU_PAGESHIFT, %g1
265
266 sethi %hi(ksfmmup), %g3
267 ldx [%g3 + %lo(ksfmmup)], %g3
268 cmp %g3, %g2
269 bne,pt %xcc, 1f ! if not kernel as, go to 1
270 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */
271
272 /* We need to demap in the kernel context */
273 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
274 stxa %g0, [%g1]ASI_DTLB_DEMAP
279 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
280
281 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU
282
283 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext
284 sll %g4, CTXREG_EXT_SHIFT, %g4
285 or %g6, %g4, %g6 ! %g6 = pgsz | cnum
286
287 set MMU_PCONTEXT, %g4
288 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */
289 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
290 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
291 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */
292 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */
293 stxa %g0, [%g1]ASI_DTLB_DEMAP
294 stxa %g0, [%g1]ASI_ITLB_DEMAP
295 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */
296 retry
297 SET_SIZE(vtag_flushpage_tl1)
298
299
300 ENTRY_NP(vtag_flush_pgcnt_tl1)
301 /*
302 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
303 *
304 * %g1 = vaddr, zero-extended on 32-bit kernel
305 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
306 *
307 * NOTE: this handler relies on the fact that no
308 * interrupts or traps can occur during the loop
309 * issuing the TLB_DEMAP operations. It is assumed
310 * that interrupts are disabled and this code is
311 * fetching from the kernel locked text address.
312 *
313 * assumes TSBE_TAG = 0
314 */
315 set SFMMU_PGCNT_MASK, %g4
316 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */
317 add %g3, 1, %g3 /* g3 = pgcnt */
318
319 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */
358 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */
359 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */
360 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */
361 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */
362
363 set MMU_PAGESIZE, %g2 /* g2 = pgsize */
364 sethi %hi(FLUSH_ADDR), %g5
365 3:
366 stxa %g0, [%g1]ASI_DTLB_DEMAP
367 stxa %g0, [%g1]ASI_ITLB_DEMAP
368 flush %g5 ! flush required by immu
369
370 deccc %g3 /* decr pgcnt */
371 bnz,pt %icc,3b
372 add %g1, %g2, %g1 /* next page */
373
374 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */
375 retry
376 SET_SIZE(vtag_flush_pgcnt_tl1)
377
378 ENTRY_NP(vtag_flushall_tl1)
379 /*
380 * x-trap to flush tlb
381 */
382 set DEMAP_ALL_TYPE, %g4
383 stxa %g0, [%g4]ASI_DTLB_DEMAP
384 stxa %g0, [%g4]ASI_ITLB_DEMAP
385 retry
386 SET_SIZE(vtag_flushall_tl1)
387
388
389 /*
390 * vac_flushpage(pfnum, color)
391 * Flush 1 8k page of the D-$ with physical page = pfnum
392 * Algorithm:
393 * The cheetah dcache is a 64k psuedo 4 way accaociative cache.
394 * It is virtual indexed, physically tagged cache.
395 */
396 .seg ".data"
397 .align 8
398 .global dflush_type
399 dflush_type:
400 .word FLUSHPAGE_TYPE
401
402 ENTRY(vac_flushpage)
403 /*
404 * flush page from the d$
405 *
406 * %o0 = pfnum, %o1 = color
407 */
408 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
409 retl
410 nop
411 SET_SIZE(vac_flushpage)
412
413
414 ENTRY_NP(vac_flushpage_tl1)
415 /*
416 * x-trap to flush page from the d$
417 *
418 * %g1 = pfnum, %g2 = color
419 */
420 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
421 retry
422 SET_SIZE(vac_flushpage_tl1)
423
424
425 ENTRY(vac_flushcolor)
426 /*
427 * %o0 = vcolor
428 */
429 DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
430 DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
431 DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
432 DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
433 retl
434 nop
435 SET_SIZE(vac_flushcolor)
436
437
438 ENTRY(vac_flushcolor_tl1)
439 /*
440 * %g1 = vcolor
441 */
442 DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
443 DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
444 DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
445 DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
446 retry
447 SET_SIZE(vac_flushcolor_tl1)
448
449 /*
450 * Determine whether or not the IDSR is busy.
451 * Entry: no arguments
452 * Returns: 1 if busy, 0 otherwise
453 */
454 ENTRY(idsr_busy)
455 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
456 clr %o0
457 btst IDSR_BUSY, %g1
458 bz,a,pt %xcc, 1f
459 mov 1, %o0
460 1:
461 retl
462 nop
463 SET_SIZE(idsr_busy)
464
465 .global _dispatch_status_busy
466 _dispatch_status_busy:
467 .asciz "ASI_INTR_DISPATCH_STATUS error: busy"
468 .align 4
469
470 /*
471 * Setup interrupt dispatch data registers
472 * Entry:
473 * %o0 - function or inumber to call
474 * %o1, %o2 - arguments (2 uint64_t's)
475 */
476 .seg "text"
477
478 ENTRY(init_mondo)
479 #ifdef DEBUG
480 !
481 ! IDSR should not be busy at the moment
482 !
483 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1
484 btst IDSR_BUSY, %g1
498 mov IDDR_1, %g2
499 mov IDDR_2, %g3
500 stxa %o0, [%g1]ASI_INTR_DISPATCH
501
502 !
503 ! interrupt vector dispatch data reg 1
504 !
505 stxa %o1, [%g2]ASI_INTR_DISPATCH
506
507 !
508 ! interrupt vector dispatch data reg 2
509 !
510 stxa %o2, [%g3]ASI_INTR_DISPATCH
511
512 membar #Sync
513 retl
514 nop
515 SET_SIZE(init_mondo_nocheck)
516 SET_SIZE(init_mondo)
517
518
519 #if !(defined(JALAPENO) || defined(SERRANO))
520
521 /*
522 * Ship mondo to aid using busy/nack pair bn
523 */
524 ENTRY_NP(shipit)
525 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id
526 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair
527 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70
528 or %g1, %g2, %g1
529 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch
530 membar #Sync
531 retl
532 nop
533 SET_SIZE(shipit)
534
535 #endif /* !(JALAPENO || SERRANO) */
536
537
538 /*
539 * flush_instr_mem:
540 * Flush 1 page of the I-$ starting at vaddr
541 * %o0 vaddr
542 * %o1 bytes to be flushed
543 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
544 * the stores from all processors so that a FLUSH instruction is only needed
545 * to ensure pipeline is consistent. This means a single flush is sufficient at
546 * the end of a sequence of stores that updates the instruction stream to
547 * ensure correct operation.
548 */
549
550 ENTRY(flush_instr_mem)
551 flush %o0 ! address irrelevant
552 retl
553 nop
554 SET_SIZE(flush_instr_mem)
555
556
557 #if defined(CPU_IMP_ECACHE_ASSOC)
558
559 ENTRY(get_ecache_ctrl)
560 GET_CPU_IMPL(%o0)
561 cmp %o0, JAGUAR_IMPL
562 !
563 ! Putting an ASI access in the delay slot may
564 ! cause it to be accessed, even when annulled.
565 !
566 bne 1f
567 nop
568 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg
569 b 2f
570 nop
571 1:
572 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg
573 2:
574 retl
575 nop
576 SET_SIZE(get_ecache_ctrl)
577
578 #endif /* CPU_IMP_ECACHE_ASSOC */
579
580
581 #if !(defined(JALAPENO) || defined(SERRANO))
582
583 /*
584 * flush_ecache:
585 * %o0 - 64 bit physical address
586 * %o1 - ecache size
587 * %o2 - ecache linesize
588 */
589
590 ENTRY(flush_ecache)
591
592 /*
593 * For certain CPU implementations, we have to flush the L2 cache
594 * before flushing the ecache.
595 */
596 PN_L2_FLUSHALL(%g3, %g4, %g5)
597
598 /*
599 * Flush the entire Ecache using displacement flush.
600 */
601 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
602
603 retl
604 nop
605 SET_SIZE(flush_ecache)
606
607 #endif /* !(JALAPENO || SERRANO) */
608
609
610 ENTRY(flush_dcache)
611 ASM_LD(%o0, dcache_size)
612 ASM_LD(%o1, dcache_linesize)
613 CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
614 retl
615 nop
616 SET_SIZE(flush_dcache)
617
618
619 ENTRY(flush_icache)
620 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
621 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
622 ba,pt %icc, 2f
623 ld [%o0 + CHPR_ICACHE_SIZE], %o0
624 flush_icache_1:
625 ASM_LD(%o0, icache_size)
626 ASM_LD(%o1, icache_linesize)
627 2:
628 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
629 retl
630 nop
631 SET_SIZE(flush_icache)
632
633 ENTRY(kdi_flush_idcache)
634 CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
635 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
636 membar #Sync
637 retl
638 nop
639 SET_SIZE(kdi_flush_idcache)
640
641 ENTRY(flush_pcache)
642 PCACHE_FLUSHALL(%o0, %o1, %o2)
643 retl
644 nop
645 SET_SIZE(flush_pcache)
646
647
648 #if defined(CPU_IMP_L1_CACHE_PARITY)
649
650 /*
651 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t
652 * structure (see cheetahregs.h):
653 * The Dcache *should* be turned off when this code is executed.
654 */
655 .align 128
656 ENTRY(get_dcache_dtag)
657 rdpr %pstate, %o5
658 andn %o5, PSTATE_IE | PSTATE_AM, %o3
659 wrpr %g0, %o3, %pstate
660 b 1f
661 stx %o0, [%o1 + CH_DC_IDX]
662
663 .align 128
664 1:
665 ldxa [%o0]ASI_DC_TAG, %o2
666 stx %o2, [%o1 + CH_DC_TAG]
667 membar #Sync
668 ldxa [%o0]ASI_DC_UTAG, %o2
669 membar #Sync
707 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
708
709 /* add the DC_data_parity bit into our working index */
710 mov 1, %o2
711 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
712 or %o0, %o2, %o0
713 3:
714 membar #Sync ! required before ASI_DC_DATA
715 ldxa [%o0 + %o3]ASI_DC_DATA, %o2
716 membar #Sync ! required after ASI_DC_DATA
717 stb %o2, [%o1]
718 dec %o1
719 cmp %o3, CH_DC_DATA_REG_SIZE - 8
720 blt 3b
721 add %o3, 8, %o3
722 4:
723 retl
724 wrpr %g0, %o5, %pstate
725 SET_SIZE(get_dcache_dtag)
726
727
728 /*
729 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t
730 * structure (see cheetahregs.h):
731 * The Icache *Must* be turned off when this function is called.
732 * This is because diagnostic accesses to the Icache interfere with cache
733 * consistency.
734 */
735 .align 128
736 ENTRY(get_icache_dtag)
737 rdpr %pstate, %o5
738 andn %o5, PSTATE_IE | PSTATE_AM, %o3
739 wrpr %g0, %o3, %pstate
740
741 stx %o0, [%o1 + CH_IC_IDX]
742 ldxa [%o0]ASI_IC_TAG, %o2
743 stx %o2, [%o1 + CH_IC_PATAG]
744 add %o0, CH_ICTAG_UTAG, %o0
745 ldxa [%o0]ASI_IC_TAG, %o2
746 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
747 stx %o2, [%o1 + CH_IC_UTAG]
749 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
750 stx %o2, [%o1 + CH_IC_UPPER]
751 ldxa [%o0]ASI_IC_TAG, %o2
752 andn %o0, CH_ICTAG_TMASK, %o0
753 stx %o2, [%o1 + CH_IC_LOWER]
754 ldxa [%o0]ASI_IC_SNP_TAG, %o2
755 stx %o2, [%o1 + CH_IC_SNTAG]
756 add %o1, CH_IC_DATA, %o1
757 clr %o3
758 2:
759 ldxa [%o0 + %o3]ASI_IC_DATA, %o2
760 stx %o2, [%o1 + %o3]
761 cmp %o3, PN_IC_DATA_REG_SIZE - 8
762 blt 2b
763 add %o3, 8, %o3
764
765 retl
766 wrpr %g0, %o5, %pstate
767 SET_SIZE(get_icache_dtag)
768
769 /*
770 * Get pcache data and tags.
771 * inputs:
772 * pcache_idx - fully constructed VA for for accessing P$ diagnostic
773 * registers. Contains PC_way and PC_addr shifted into
774 * the correct bit positions. See the PRM for more details.
775 * data - pointer to a ch_pc_data_t
776 * structure (see cheetahregs.h):
777 */
778 .align 128
779 ENTRY(get_pcache_dtag)
780 rdpr %pstate, %o5
781 andn %o5, PSTATE_IE | PSTATE_AM, %o3
782 wrpr %g0, %o3, %pstate
783
784 stx %o0, [%o1 + CH_PC_IDX]
785 ldxa [%o0]ASI_PC_STATUS_DATA, %o2
786 stx %o2, [%o1 + CH_PC_STATUS]
787 ldxa [%o0]ASI_PC_TAG, %o2
788 stx %o2, [%o1 + CH_PC_TAG]
789 ldxa [%o0]ASI_PC_SNP_TAG, %o2
790 stx %o2, [%o1 + CH_PC_SNTAG]
791 add %o1, CH_PC_DATA, %o1
792 clr %o3
793 2:
794 ldxa [%o0 + %o3]ASI_PC_DATA, %o2
795 stx %o2, [%o1 + %o3]
796 cmp %o3, CH_PC_DATA_REG_SIZE - 8
797 blt 2b
798 add %o3, 8, %o3
799
800 retl
801 wrpr %g0, %o5, %pstate
802 SET_SIZE(get_pcache_dtag)
803
804 #endif /* CPU_IMP_L1_CACHE_PARITY */
805
806 /*
807 * re-enable the i$, d$, w$, and p$ according to bootup cache state.
808 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
809 * %o0 - 64 bit constant
810 */
811 ENTRY(set_dcu)
812 stxa %o0, [%g0]ASI_DCU ! Store to DCU
813 flush %g0 /* flush required after changing the IC bit */
814 retl
815 nop
816 SET_SIZE(set_dcu)
817
818
819 /*
820 * Return DCU register.
821 */
822 ENTRY(get_dcu)
823 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */
824 retl
825 nop
826 SET_SIZE(get_dcu)
827
828 /*
829 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
830 *
831 * This handler is used to check for softints generated by error trap
832 * handlers to report errors. On Cheetah, this mechanism is used by the
833 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
834 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
835 * NB: Must be 8 instructions or less to fit in trap table and code must
836 * be relocatable.
837 */
838
839 ENTRY_NP(ch_pil15_interrupt_instr)
840 ASM_JMP(%g1, ch_pil15_interrupt)
841 SET_SIZE(ch_pil15_interrupt_instr)
842
843
844 ENTRY_NP(ch_pil15_interrupt)
845
846 /*
847 * Since pil_interrupt is hacked to assume that every level 15
848 * interrupt is generated by the CPU to indicate a performance
849 * counter overflow this gets ugly. Before calling pil_interrupt
850 * the Error at TL>0 pending status is inspected. If it is
851 * non-zero, then an error has occurred and it is handled.
852 * Otherwise control is transfered to pil_interrupt. Note that if
853 * an error is detected pil_interrupt will not be called and
854 * overflow interrupts may be lost causing erroneous performance
855 * measurements. However, error-recovery will have a detrimental
856 * effect on performance anyway.
857 */
858 CPU_INDEX(%g1, %g4)
859 set ch_err_tl1_pending, %g4
860 ldub [%g1 + %g4], %g2
861 brz %g2, 1f
862 nop
863
877 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
878 * to process the Fast ECC/Cache Parity at TL>0 error. Clear
879 * panic flag (%g2).
880 */
881 set cpu_tl1_error, %g1
882 clr %g2
883 ba sys_trap
884 mov PIL_15, %g4
885
886 1:
887 /*
888 * The logout is invalid.
889 *
890 * Call the default interrupt handler.
891 */
892 sethi %hi(pil_interrupt), %g1
893 jmp %g1 + %lo(pil_interrupt)
894 mov PIL_15, %g4
895
896 SET_SIZE(ch_pil15_interrupt)
897
898
899 /*
900 * Error Handling
901 *
902 * Cheetah provides error checking for all memory access paths between
903 * the CPU, External Cache, Cheetah Data Switch and system bus. Error
904 * information is logged in the AFSR, (also AFSR_EXT for Panther) and
905 * AFAR and one of the following traps is generated (provided that it
906 * is enabled in External Cache Error Enable Register) to handle that
907 * error:
908 * 1. trap 0x70: Precise trap
909 * tt0_fecc for errors at trap level(TL)>=0
910 * 2. trap 0x0A and 0x32: Deferred trap
911 * async_err for errors at TL>=0
912 * 3. trap 0x63: Disrupting trap
913 * ce_err for errors at TL=0
914 * (Note that trap 0x63 cannot happen at trap level > 0)
915 *
916 * Trap level one handlers panic the system except for the fast ecc
969 * can flush the E$ (or the error is a stuck-at bit), we will recurse in
970 * the Fast ECC at TL>0 handler and eventually Red Mode.
971 *
972 * Note that for Cheetah (and only Cheetah), we use alias addresses for
973 * flushing rather than ASI accesses (which don't exist on Cheetah).
974 * Should we encounter a Fast ECC error within this handler on Cheetah,
975 * there's a good chance it's within the ecache_flushaddr buffer (since
976 * it's the largest piece of memory we touch in the handler and it is
977 * usually kernel text/data). For that reason the Fast ECC at TL>0
978 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
979 */
980
981 /*
982 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
983 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
984 * architecture-specific files.
985 * NB: Must be 8 instructions or less to fit in trap table and code must
986 * be relocatable.
987 */
988
989 ENTRY_NP(fecc_err_instr)
990 membar #Sync ! Cheetah requires membar #Sync
991
992 /*
993 * Save current DCU state. Turn off the Dcache and Icache.
994 */
995 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1
996 andn %g1, DCU_DC + DCU_IC, %g4
997 stxa %g4, [%g0]ASI_DCU
998 flush %g0 /* flush required after changing the IC bit */
999
1000 ASM_JMP(%g4, fast_ecc_err)
1001 SET_SIZE(fecc_err_instr)
1002
1003
1004 #if !(defined(JALAPENO) || defined(SERRANO))
1005
1006 .section ".text"
1007 .align 64
1008 ENTRY_NP(fast_ecc_err)
1009
1010 /*
1011 * Turn off CEEN and NCEEN.
1012 */
1013 ldxa [%g0]ASI_ESTATE_ERR, %g3
1014 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1015 stxa %g4, [%g0]ASI_ESTATE_ERR
1016 membar #Sync ! membar sync required
1017
1018 /*
1019 * Check to see whether we need to park our sibling core
1020 * before recording diagnostic information from caches
1021 * which may be shared by both cores.
1022 * We use %g1 to store information about whether or not
1023 * we had to park the core (%g1 holds our DCUCR value and
1024 * we only use bits from that register which are "reserved"
1025 * to keep track of core parking) so that we know whether
1125 * Otherwise, if the logout structure was busy but we have not
1126 * nested more times than our maximum value, then we simply
1127 * issue a retry. Our TL=0 trap handler code will check and
1128 * clear the AFSR after it is done logging what is currently
1129 * in the logout struct and handle this event at that time.
1130 */
1131 retry
1132 8:
1133 /*
1134 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1135 * already at PIL 15.
1136 */
1137 set cpu_fast_ecc_error, %g1
1138 rdpr %pil, %g4
1139 cmp %g4, PIL_14
1140 ba sys_trap
1141 movl %icc, PIL_14, %g4
1142
1143 SET_SIZE(fast_ecc_err)
1144
1145 #endif /* !(JALAPENO || SERRANO) */
1146
1147
1148 /*
1149 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1150 *
1151 * The basic flow of this trap handler is as follows:
1152 *
1153 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1154 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1155 * will use to save %g1 and %g2.
1156 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1157 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1158 * handler (using the just saved %g1).
1159 * 3) Turn off the Dcache if it was on and save the state of the Dcache
1160 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1161 * NB: we don't turn off the Icache because bad data is not installed nor
1162 * will we be doing any diagnostic accesses.
1163 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1164 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1182 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther,
1183 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1184 * 14) Flush and re-enable the Dcache if it was on at step 3.
1185 * 15) Do TRAPTRACE if enabled.
1186 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1187 * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1188 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the
1189 * event pending flag and call cpu_tl1_error via systrap if set.
1190 * 19) Restore the registers from step 5 and issue retry.
1191 */
1192
1193 /*
1194 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1195 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1196 * architecture-specific files. This generates a "Software Trap 0" at TL>0,
1197 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1198 * NB: Must be 8 instructions or less to fit in trap table and code must
1199 * be relocatable.
1200 */
1201
1202 ENTRY_NP(fecc_err_tl1_instr)
1203 CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1204 SET_SIZE(fecc_err_tl1_instr)
1205
1206 /*
1207 * Software trap 0 at TL>0.
1208 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1209 * the various architecture-specific files. This is used as a continuation
1210 * of the fast ecc handling where we've bought an extra TL level, so we can
1211 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1212 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1213 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1214 * order two bits from %g1 and %g2 respectively).
1215 * NB: Must be 8 instructions or less to fit in trap table and code must
1216 * be relocatable.
1217 */
1218
1219 ENTRY_NP(fecc_err_tl1_cont_instr)
1220 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1221 SET_SIZE(fecc_err_tl1_cont_instr)
1222
1223
1224 /*
1225 * The ce_err function handles disrupting trap type 0x63 at TL=0.
1226 *
1227 * AFSR errors bits which cause this trap are:
1228 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1229 *
1230 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1231 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1232 *
1233 * CEEN Bit of Cheetah External Cache Error Enable Register enables
1234 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1235 *
1236 * Cheetah+ also handles (No additional processing required):
1237 * DUE, DTO, DBERR (NCEEN controlled)
1238 * THCE (CEEN and ET_ECC_en controlled)
1239 * TUE (ET_ECC_en controlled)
1240 *
1241 * Panther further adds:
1242 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled)
1243 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled)
1382 /*
1383 * Otherwise, if the logout structure was busy but we have not
1384 * nested more times than our maximum value, then we simply
1385 * issue a retry. Our TL=0 trap handler code will check and
1386 * clear the AFSR after it is done logging what is currently
1387 * in the logout struct and handle this event at that time.
1388 */
1389 retry
1390 4:
1391 /*
1392 * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1393 * already at PIL 15.
1394 */
1395 set cpu_disrupting_error, %g1
1396 rdpr %pil, %g4
1397 cmp %g4, PIL_14
1398 ba sys_trap
1399 movl %icc, PIL_14, %g4
1400 SET_SIZE(ce_err)
1401
1402
1403 /*
1404 * This trap cannot happen at TL>0 which means this routine will never
1405 * actually be called and so we treat this like a BAD TRAP panic.
1406 */
1407 .align 64
1408 ENTRY_NP(ce_err_tl1)
1409
1410 call ptl1_panic
1411 mov PTL1_BAD_TRAP, %g1
1412
1413 SET_SIZE(ce_err_tl1)
1414
1415
1416 /*
1417 * The async_err function handles deferred trap types 0xA
1418 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1419 *
1420 * AFSR errors bits which cause this trap are:
1421 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1422 * On some platforms, EMU may causes cheetah to pull the error pin
1423 * never giving Solaris a chance to take a trap.
1424 *
1425 * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1426 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1427 *
1428 * Steps:
1429 * 1. Disable CEEN and NCEEN errors to prevent recursive errors.
1430 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1431 * I$ line in DO_CPU_LOGOUT.
1432 * 3. Park sibling core if caches are shared (to avoid race
1433 * condition while accessing shared resources such as L3
1434 * data staging register during CPU logout.
1435 * 4. If the CPU logout structure is not currently being used:
1593 call ptl1_panic
1594 mov PTL1_BAD_ECC, %g1
1595
1596 3:
1597 /*
1598 * Otherwise, if the logout structure was busy but we have not
1599 * nested more times than our maximum value, then we simply
1600 * issue a retry. Our TL=0 trap handler code will check and
1601 * clear the AFSR after it is done logging what is currently
1602 * in the logout struct and handle this event at that time.
1603 */
1604 retry
1605 4:
1606 RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1607 async_err_resetskip:
1608 set cpu_deferred_error, %g1
1609 ba sys_trap
1610 mov PIL_15, %g4 ! run at pil 15
1611 SET_SIZE(async_err)
1612
1613 #if defined(CPU_IMP_L1_CACHE_PARITY)
1614
1615 /*
1616 * D$ parity error trap (trap 71) at TL=0.
1617 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1618 * the various architecture-specific files. This merely sets up the
1619 * arguments for cpu_parity_error and calls it via sys_trap.
1620 * NB: Must be 8 instructions or less to fit in trap table and code must
1621 * be relocatable.
1622 */
1623 ENTRY_NP(dcache_parity_instr)
1624 membar #Sync ! Cheetah+ requires membar #Sync
1625 set cpu_parity_error, %g1
1626 or %g0, CH_ERR_DPE, %g2
1627 rdpr %tpc, %g3
1628 sethi %hi(sys_trap), %g7
1629 jmp %g7 + %lo(sys_trap)
1630 mov PIL_15, %g4 ! run at pil 15
1631 SET_SIZE(dcache_parity_instr)
1632
1633
1634 /*
1635 * D$ parity error trap (trap 71) at TL>0.
1636 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1637 * the various architecture-specific files. This generates a "Software
1638 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1639 * continue the handling there.
1640 * NB: Must be 8 instructions or less to fit in trap table and code must
1641 * be relocatable.
1642 */
1643 ENTRY_NP(dcache_parity_tl1_instr)
1644 CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1645 SET_SIZE(dcache_parity_tl1_instr)
1646
1647
1648 /*
1649 * Software trap 1 at TL>0.
1650 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1651 * of the various architecture-specific files. This is used as a continuation
1652 * of the dcache parity handling where we've bought an extra TL level, so we
1653 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1654 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1655 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1656 * order two bits from %g1 and %g2 respectively).
1657 * NB: Must be 8 instructions or less to fit in trap table and code must
1658 * be relocatable.
1659 */
1660 ENTRY_NP(dcache_parity_tl1_cont_instr)
1661 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
1662 SET_SIZE(dcache_parity_tl1_cont_instr)
1663
1664 /*
1665 * D$ parity error at TL>0 handler
1666 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter
1667 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1668 */
1669
1670 ENTRY_NP(dcache_parity_tl1_err)
1671
1672 /*
1673 * This macro saves all the %g registers in the ch_err_tl1_data
1674 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1675 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
1676 * the ch_err_tl1_data structure and %g2 will have the original
1677 * flags in the ch_err_tl1_data structure. All %g registers
1678 * except for %g1 and %g2 will be available.
1679 */
1680 CH_ERR_TL1_ENTER(CH_ERR_DPE);
1681
1682 #ifdef TRAPTRACE
1683 /*
1684 * Get current trap trace entry physical pointer.
1685 */
1686 CPU_INDEX(%g6, %g5)
1687 sll %g6, TRAPTR_SIZE_SHIFT, %g6
1688 set trap_trace_ctl, %g5
1689 add %g6, %g5, %g6
1740 * and HW does not automatically disable P$, we need to disable it
1741 * here so that we don't encounter any recursive traps when we
1742 * issue the retry.
1743 */
1744 ldxa [%g0]ASI_DCU, %g3
1745 mov 1, %g4
1746 sllx %g4, DCU_PE_SHIFT, %g4
1747 andn %g3, %g4, %g3
1748 stxa %g3, [%g0]ASI_DCU
1749 membar #Sync
1750
1751 /*
1752 * We fall into this macro if we've successfully logged the error in
1753 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1754 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
1755 * Restores the %g registers and issues retry.
1756 */
1757 CH_ERR_TL1_EXIT;
1758 SET_SIZE(dcache_parity_tl1_err)
1759
1760 /*
1761 * I$ parity error trap (trap 72) at TL=0.
1762 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
1763 * the various architecture-specific files. This merely sets up the
1764 * arguments for cpu_parity_error and calls it via sys_trap.
1765 * NB: Must be 8 instructions or less to fit in trap table and code must
1766 * be relocatable.
1767 */
1768
1769 ENTRY_NP(icache_parity_instr)
1770 membar #Sync ! Cheetah+ requires membar #Sync
1771 set cpu_parity_error, %g1
1772 or %g0, CH_ERR_IPE, %g2
1773 rdpr %tpc, %g3
1774 sethi %hi(sys_trap), %g7
1775 jmp %g7 + %lo(sys_trap)
1776 mov PIL_15, %g4 ! run at pil 15
1777 SET_SIZE(icache_parity_instr)
1778
1779 /*
1780 * I$ parity error trap (trap 72) at TL>0.
1781 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
1782 * the various architecture-specific files. This generates a "Software
1783 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
1784 * continue the handling there.
1785 * NB: Must be 8 instructions or less to fit in trap table and code must
1786 * be relocatable.
1787 */
1788 ENTRY_NP(icache_parity_tl1_instr)
1789 CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
1790 SET_SIZE(icache_parity_tl1_instr)
1791
1792 /*
1793 * Software trap 2 at TL>0.
1794 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
1795 * of the various architecture-specific files. This is used as a continuation
1796 * of the icache parity handling where we've bought an extra TL level, so we
1797 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1798 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1799 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low
1800 * order two bits from %g1 and %g2 respectively).
1801 * NB: Must be 8 instructions or less to fit in trap table and code must
1802 * be relocatable.
1803 */
1804 ENTRY_NP(icache_parity_tl1_cont_instr)
1805 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
1806 SET_SIZE(icache_parity_tl1_cont_instr)
1807
1808
1809 /*
1810 * I$ parity error at TL>0 handler
1811 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter
1812 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1813 */
1814
1815 ENTRY_NP(icache_parity_tl1_err)
1816
1817 /*
1818 * This macro saves all the %g registers in the ch_err_tl1_data
1819 * structure, updates the ch_err_tl1_flags and saves the %tpc in
1820 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to
1821 * the ch_err_tl1_data structure and %g2 will have the original
1822 * flags in the ch_err_tl1_data structure. All %g registers
1823 * except for %g1 and %g2 will be available.
1824 */
1825 CH_ERR_TL1_ENTER(CH_ERR_IPE);
1826
1827 #ifdef TRAPTRACE
1828 /*
1829 * Get current trap trace entry physical pointer.
1830 */
1831 CPU_INDEX(%g6, %g5)
1832 sll %g6, TRAPTR_SIZE_SHIFT, %g6
1833 set trap_trace_ctl, %g5
1834 add %g6, %g5, %g6
1870 ld [%g6 + TRAPTR_LIMIT], %g4
1871 st %g5, [%g6 + TRAPTR_LAST_OFFSET]
1872 add %g5, TRAP_ENT_SIZE, %g5
1873 sub %g4, TRAP_ENT_SIZE, %g4
1874 cmp %g5, %g4
1875 movge %icc, 0, %g5
1876 st %g5, [%g6 + TRAPTR_OFFSET]
1877 ipe_tl1_skip_tt:
1878 #endif /* TRAPTRACE */
1879
1880 /*
1881 * We fall into this macro if we've successfully logged the error in
1882 * the ch_err_tl1_data structure and want the PIL15 softint to pick
1883 * it up and log it. %g1 must point to the ch_err_tl1_data structure.
1884 * Restores the %g registers and issues retry.
1885 */
1886 CH_ERR_TL1_EXIT;
1887
1888 SET_SIZE(icache_parity_tl1_err)
1889
1890 #endif /* CPU_IMP_L1_CACHE_PARITY */
1891
1892
1893 /*
1894 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1895 * tte, the virtual address, and the ctxnum of the specified tlb entry. They
1896 * should only be used in places where you have no choice but to look at the
1897 * tlb itself.
1898 *
1899 * Note: These two routines are required by the Estar "cpr" loadable module.
1900 */
1901
1902 ENTRY_NP(itlb_rd_entry)
1903 sllx %o0, 3, %o0
1904 ldxa [%o0]ASI_ITLB_ACCESS, %g1
1905 stx %g1, [%o1]
1906 ldxa [%o0]ASI_ITLB_TAGREAD, %g2
1907 set TAGREAD_CTX_MASK, %o4
1908 andn %g2, %o4, %o5
1909 retl
1910 stx %o5, [%o2]
1911 SET_SIZE(itlb_rd_entry)
1912
1913
1914 ENTRY_NP(dtlb_rd_entry)
1915 sllx %o0, 3, %o0
1916 ldxa [%o0]ASI_DTLB_ACCESS, %g1
1917 stx %g1, [%o1]
1918 ldxa [%o0]ASI_DTLB_TAGREAD, %g2
1919 set TAGREAD_CTX_MASK, %o4
1920 andn %g2, %o4, %o5
1921 retl
1922 stx %o5, [%o2]
1923 SET_SIZE(dtlb_rd_entry)
1924
1925
1926 #if !(defined(JALAPENO) || defined(SERRANO))
1927
1928 ENTRY(get_safari_config)
1929 ldxa [%g0]ASI_SAFARI_CONFIG, %o0
1930 retl
1931 nop
1932 SET_SIZE(get_safari_config)
1933
1934
1935 ENTRY(set_safari_config)
1936 stxa %o0, [%g0]ASI_SAFARI_CONFIG
1937 membar #Sync
1938 retl
1939 nop
1940 SET_SIZE(set_safari_config)
1941
1942 #endif /* !(JALAPENO || SERRANO) */
1943
1944
1945 /*
1946 * Clear the NPT (non-privileged trap) bit in the %tick/%stick
1947 * registers. In an effort to make the change in the
1948 * tick/stick counter as consistent as possible, we disable
1949 * all interrupts while we're changing the registers. We also
1950 * ensure that the read and write instructions are in the same
1951 * line in the instruction cache.
1952 */
1953 ENTRY_NP(cpu_clearticknpt)
1954 rdpr %pstate, %g1 /* save processor state */
1955 andn %g1, PSTATE_IE, %g3 /* turn off */
1956 wrpr %g0, %g3, %pstate /* interrupts */
1957 rdpr %tick, %g2 /* get tick register */
1958 brgez,pn %g2, 1f /* if NPT bit off, we're done */
1959 mov 1, %g3 /* create mask */
1960 sllx %g3, 63, %g3 /* for NPT bit */
1961 ba,a,pt %xcc, 2f
1962 .align 8 /* Ensure rd/wr in same i$ line */
1963 2:
1964 rdpr %tick, %g2 /* get tick register */
1965 wrpr %g3, %g2, %tick /* write tick register, */
1966 /* clearing NPT bit */
1967 1:
1968 rd STICK, %g2 /* get stick register */
1969 brgez,pn %g2, 3f /* if NPT bit off, we're done */
1970 mov 1, %g3 /* create mask */
1971 sllx %g3, 63, %g3 /* for NPT bit */
1972 ba,a,pt %xcc, 4f
1973 .align 8 /* Ensure rd/wr in same i$ line */
1974 4:
1975 rd STICK, %g2 /* get stick register */
1976 wr %g3, %g2, STICK /* write stick register, */
1977 /* clearing NPT bit */
1978 3:
1979 jmp %g4 + 4
1980 wrpr %g0, %g1, %pstate /* restore processor state */
1981
1982 SET_SIZE(cpu_clearticknpt)
1983
1984
1985 #if defined(CPU_IMP_L1_CACHE_PARITY)
1986
1987 /*
1988 * correct_dcache_parity(size_t size, size_t linesize)
1989 *
1990 * Correct D$ data parity by zeroing the data and initializing microtag
1991 * for all indexes and all ways of the D$.
1992 *
1993 */
1994 ENTRY(correct_dcache_parity)
1995 /*
1996 * Register Usage:
1997 *
1998 * %o0 = input D$ size
1999 * %o1 = input D$ line size
2000 * %o2 = scratch
2001 * %o3 = scratch
2002 * %o4 = scratch
2003 */
2004
2005 sub %o0, %o1, %o0 ! init cache line address
2006
2007 /*
2008 * For Panther CPUs, we also need to clear the data parity bits
2009 * using DC_data_parity bit of the ASI_DCACHE_DATA register.
2010 */
2011 GET_CPU_IMPL(%o3)
2012 cmp %o3, PANTHER_IMPL
2013 bne 1f
2035 membar #Sync ! required after ASI_DC_DATA
2036 /*
2037 * We also clear the parity bits if this is a panther. For non-Panther
2038 * CPUs, we simply end up clearing the $data register twice.
2039 */
2040 stxa %g0, [%o4 + %o2]ASI_DC_DATA
2041 membar #Sync
2042
2043 subcc %o2, 8, %o2
2044 bge 2b
2045 nop
2046
2047 subcc %o0, %o1, %o0
2048 bge 1b
2049 nop
2050
2051 retl
2052 nop
2053 SET_SIZE(correct_dcache_parity)
2054
2055 #endif /* CPU_IMP_L1_CACHE_PARITY */
2056
2057
2058 ENTRY_NP(stick_timestamp)
2059 rd STICK, %g1 ! read stick reg
2060 sllx %g1, 1, %g1
2061 srlx %g1, 1, %g1 ! clear npt bit
2062
2063 retl
2064 stx %g1, [%o0] ! store the timestamp
2065 SET_SIZE(stick_timestamp)
2066
2067
2068 ENTRY_NP(stick_adj)
2069 rdpr %pstate, %g1 ! save processor state
2070 andn %g1, PSTATE_IE, %g3
2071 ba 1f ! cache align stick adj
2072 wrpr %g0, %g3, %pstate ! turn off interrupts
2073
2074 .align 16
2075 1: nop
2076
2077 rd STICK, %g4 ! read stick reg
2078 add %g4, %o0, %o1 ! adjust stick with skew
2079 wr %o1, %g0, STICK ! write stick reg
2080
2081 retl
2082 wrpr %g1, %pstate ! restore processor state
2083 SET_SIZE(stick_adj)
2084
2085 ENTRY_NP(kdi_get_stick)
2086 rd STICK, %g1
2087 stx %g1, [%o0]
2088 retl
2089 mov %g0, %o0
2090 SET_SIZE(kdi_get_stick)
2091
2092 /*
2093 * Invalidate the specified line from the D$.
2094 *
2095 * Register usage:
2096 * %o0 - index for the invalidation, specifies DC_way and DC_addr
2097 *
2098 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is
2099 * stored to a particular DC_way and DC_addr in ASI_DC_TAG.
2100 *
2101 * The format of the stored 64-bit value is:
2102 *
2103 * +----------+--------+----------+
2104 * | Reserved | DC_tag | DC_valid |
2105 * +----------+--------+----------+
2106 * 63 31 30 1 0
2107 *
2108 * DC_tag is the 30-bit physical tag of the associated line.
2109 * DC_valid is the 1-bit valid field for both the physical and snoop tags.
2110 *
2111 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is:
2112 *
2113 * +----------+--------+----------+----------+
2114 * | Reserved | DC_way | DC_addr | Reserved |
2115 * +----------+--------+----------+----------+
2116 * 63 16 15 14 13 5 4 0
2117 *
2118 * DC_way is a 2-bit index that selects one of the 4 ways.
2119 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields.
2120 *
2121 * Setting the DC_valid bit to zero for the specified DC_way and
2122 * DC_addr index into the D$ results in an invalidation of a D$ line.
2123 */
2124 ENTRY(dcache_inval_line)
2125 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr
2126 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits
2127 membar #Sync
2128 retl
2129 nop
2130 SET_SIZE(dcache_inval_line)
2131
2132 /*
2133 * Invalidate the entire I$
2134 *
2135 * Register usage:
2136 * %o0 - specifies IC_way, IC_addr, IC_tag
2137 * %o1 - scratch
2138 * %o2 - used to save and restore DCU value
2139 * %o3 - scratch
2140 * %o5 - used to save and restore PSTATE
2141 *
2142 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG,
2143 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and
2144 * block out snoops and invalidates to the I$, causing I$ consistency
2145 * to be broken. Before turning on the I$, all I$ lines must be invalidated.
2146 *
2147 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is
2148 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The
2149 * info below describes store (write) use of ASI_IC_TAG. Note that read
2150 * use of ASI_IC_TAG behaves differently.
2151 *
2159 * Valid is the 1-bit valid field for both the physical and snoop tags.
2160 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at
2161 * the 32-byte boundary aligned address specified by IC_addr.
2162 *
2163 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is:
2164 *
2165 * +----------+--------+---------+--------+---------+
2166 * | Reserved | IC_way | IC_addr | IC_tag |Reserved |
2167 * +----------+--------+---------+--------+---------+
2168 * 63 16 15 14 13 5 4 3 2 0
2169 *
2170 * IC_way is a 2-bit index that selects one of the 4 ways.
2171 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields.
2172 * IC_addr[5] is a "don't care" for a store.
2173 * IC_tag set to 2 specifies that the stored value is to be interpreted
2174 * as containing Valid and IC_vpred as described above.
2175 *
2176 * Setting the Valid bit to zero for the specified IC_way and
2177 * IC_addr index into the I$ results in an invalidation of an I$ line.
2178 */
2179 ENTRY(icache_inval_all)
2180 rdpr %pstate, %o5
2181 andn %o5, PSTATE_IE, %o3
2182 wrpr %g0, %o3, %pstate ! clear IE bit
2183
2184 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2185 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1
2186 ba,pt %icc, 2f
2187 ld [%o0 + CHPR_ICACHE_SIZE], %o0
2188 icache_inval_all_1:
2189 ASM_LD(%o0, icache_size)
2190 ASM_LD(%o1, icache_linesize)
2191 2:
2192 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2193
2194 retl
2195 wrpr %g0, %o5, %pstate ! restore earlier pstate
2196 SET_SIZE(icache_inval_all)
2197
2198
2199 /*
2200 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a
2201 * crosstrap. It atomically increments the outstanding request counter and,
2202 * if there was not already an outstanding request, branches to setsoftint_tl1
2203 * to enqueue an intr_vec for the given inum.
2204 */
2205
2206 ! Register usage:
2207 !
2208 ! Arguments:
2209 ! %g1 - inum
2210 ! %g2 - index into chsm_outstanding array
2211 !
2212 ! Internal:
2213 ! %g2, %g3, %g5 - scratch
2214 ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2215 ! %g6 - setsoftint_tl1 address
2216
2217 ENTRY_NP(cache_scrubreq_tl1)
2218 mulx %g2, CHSM_OUTSTANDING_INCR, %g2
2219 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2220 add %g2, %g3, %g2
2221 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2222 ld [%g4], %g2 ! cpu's chsm_outstanding[index]
2223 !
2224 ! no need to use atomic instructions for the following
2225 ! increment - we're at tl1
2226 !
2227 add %g2, 0x1, %g3
2228 brnz,pn %g2, 1f ! no need to enqueue more intr_vec
2229 st %g3, [%g4] ! delay - store incremented counter
2230 ASM_JMP(%g6, setsoftint_tl1)
2231 ! not reached
2232 1:
2233 retry
2234 SET_SIZE(cache_scrubreq_tl1)
2235
2236
2237 /*
2238 * Get the error state for the processor.
2239 * Note that this must not be used at TL>0
2240 */
2241 ENTRY(get_cpu_error_state)
2242 #if defined(CHEETAH_PLUS)
2243 set ASI_SHADOW_REG_VA, %o2
2244 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg
2245 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2246 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg
2247 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2248 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers
2249 cmp %o3, PANTHER_IMPL
2250 bne,a 1f
2251 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN
2252 set ASI_AFSR_EXT_VA, %o2
2253 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg
2254 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2255 set ASI_SHADOW_AFSR_EXT_VA, %o2
2256 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg
2264 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2265 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2266 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2267 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2268 #endif /* CHEETAH_PLUS */
2269 #if defined(SERRANO)
2270 /*
2271 * Serrano has an afar2 which captures the address on FRC/FRU errors.
2272 * We save this in the afar2 of the register save area.
2273 */
2274 set ASI_MCU_AFAR2_VA, %o2
2275 ldxa [%o2]ASI_MCU_CTRL, %o1
2276 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2277 #endif /* SERRANO */
2278 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg
2279 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR]
2280 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg
2281 retl
2282 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR]
2283 SET_SIZE(get_cpu_error_state)
2284
2285 /*
2286 * Check a page of memory for errors.
2287 *
2288 * Load each 64 byte block from physical memory.
2289 * Check AFSR after each load to see if an error
2290 * was caused. If so, log/scrub that error.
2291 *
2292 * Used to determine if a page contains
2293 * CEs when CEEN is disabled.
2294 */
2295 ENTRY(cpu_check_block)
2296 !
2297 ! get a new window with room for the error regs
2298 !
2299 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2300 srl %i1, 6, %l4 ! clear top bits of psz
2301 ! and divide by 64
2302 rd %fprs, %l2 ! store FP
2303 wr %g0, FPRS_FEF, %fprs ! enable FP
2304 1:
2305 ldda [%i0]ASI_BLK_P, %d0 ! load a block
2306 membar #Sync
2307 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg
2308 brz,a,pt %l3, 2f ! check for error
2309 nop
2310
2311 !
2312 ! if error, read the error regs and log it
2313 !
2314 call get_cpu_error_state
2315 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2316
2317 !
2318 ! cpu_ce_detected(ch_cpu_errors_t *, flag)
2319 !
2320 call cpu_ce_detected ! log the error
2321 mov CE_CEEN_TIMEOUT, %o1
2322 2:
2323 dec %l4 ! next 64-byte block
2324 brnz,a,pt %l4, 1b
2325 add %i0, 64, %i0 ! increment block addr
2326
2327 wr %l2, %g0, %fprs ! restore FP
2328 ret
2329 restore
2330
2331 SET_SIZE(cpu_check_block)
2332
2333 /*
2334 * Perform a cpu logout called from C. This is used where we did not trap
2335 * for the error but still want to gather "what we can". Caller must make
2336 * sure cpu private area exists and that the indicated logout area is free
2337 * for use, and that we are unable to migrate cpus.
2338 */
2339 ENTRY(cpu_delayed_logout)
2340 rdpr %pstate, %o2
2341 andn %o2, PSTATE_IE, %o2
2342 wrpr %g0, %o2, %pstate ! disable interrupts
2343 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value
2344 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2345 rd %asi, %g1
2346 wr %g0, ASI_P, %asi
2347 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2348 wr %g1, %asi
2349 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again
2350 rdpr %pstate, %o2
2351 or %o2, PSTATE_IE, %o2
2352 wrpr %g0, %o2, %pstate
2353 retl
2354 nop
2355 SET_SIZE(cpu_delayed_logout)
2356
2357 ENTRY(dtrace_blksuword32)
2358 save %sp, -SA(MINFRAME + 4), %sp
2359
2360 rdpr %pstate, %l1
2361 andn %l1, PSTATE_IE, %l2 ! disable interrupts to
2362 wrpr %g0, %l2, %pstate ! protect our FPU diddling
2363
2364 rd %fprs, %l0
2365 andcc %l0, FPRS_FEF, %g0
2366 bz,a,pt %xcc, 1f ! if the fpu is disabled
2367 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu
2368
2369 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack
2370 1:
2371 set 0f, %l5
2372 /*
2373 * We're about to write a block full or either total garbage
2374 * (not kernel data, don't worry) or user floating-point data
2375 * (so it only _looks_ like garbage).
2376 */
2401
2402 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0
2403 1:
2404
2405 wrpr %g0, %l1, %pstate ! restore interrupts
2406
2407 /*
2408 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2409 * which deals with watchpoints. Otherwise, just return -1.
2410 */
2411 brnz,pt %i2, 1f
2412 nop
2413 ret
2414 restore %g0, -1, %o0
2415 1:
2416 call dtrace_blksuword32_err
2417 restore
2418
2419 SET_SIZE(dtrace_blksuword32)
2420
2421 #ifdef CHEETAHPLUS_ERRATUM_25
2422
2423 ENTRY(claimlines)
2424 1:
2425 subcc %o1, %o2, %o1
2426 add %o0, %o1, %o3
2427 bgeu,a,pt %xcc, 1b
2428 casxa [%o3]ASI_MEM, %g0, %g0
2429 membar #Sync
2430 retl
2431 nop
2432 SET_SIZE(claimlines)
2433
2434 ENTRY(cpu_feature_init)
2435 save %sp, -SA(MINFRAME), %sp
2436 sethi %hi(cheetah_bpe_off), %o0
2437 ld [%o0 + %lo(cheetah_bpe_off)], %o0
2438 brz %o0, 1f
2439 nop
2440 rd ASR_DISPATCH_CONTROL, %o0
2441 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0
2442 wr %o0, 0, ASR_DISPATCH_CONTROL
2443 1:
2444 !
2445 ! get the device_id and store the device_id
2446 ! in the appropriate cpunodes structure
2447 ! given the cpus index
2448 !
2449 CPU_INDEX(%o0, %o1)
2450 mulx %o0, CPU_NODE_SIZE, %o0
2451 set cpunodes + DEVICE_ID, %o1
2452 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
2453 stx %o2, [%o0 + %o1]
2454 #ifdef CHEETAHPLUS_ERRATUM_34
2455 !
2456 ! apply Cheetah+ erratum 34 workaround
2457 !
2458 call itlb_erratum34_fixup
2459 nop
2460 call dtlb_erratum34_fixup
2461 nop
2462 #endif /* CHEETAHPLUS_ERRATUM_34 */
2463 ret
2464 restore
2465 SET_SIZE(cpu_feature_init)
2466
2467 /*
2468 * Copy a tsb entry atomically, from src to dest.
2469 * src must be 128 bit aligned.
2470 */
2471 ENTRY(copy_tsb_entry)
2472 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data
2473 stx %o2, [%o1]
2474 stx %o3, [%o1 + 8 ]
2475 retl
2476 nop
2477 SET_SIZE(copy_tsb_entry)
2478
2479 #endif /* CHEETAHPLUS_ERRATUM_25 */
2480
2481 #ifdef CHEETAHPLUS_ERRATUM_34
2482
2483 !
2484 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
2485 ! index 0 TTE will erroneously be displaced when a new TTE is
2486 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34,
2487 ! locked index 0 TTEs must be relocated.
2488 !
2489 ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
2490 !
2491 ENTRY_NP(itlb_erratum34_fixup)
2492 rdpr %pstate, %o3
2493 #ifdef DEBUG
2494 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
2495 #endif /* DEBUG */
2496 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
2497 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data
2498 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag
2499
2500 cmp %o1, %g0 ! Is this entry valid?
2501 bge %xcc, 1f
2502 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
2527 !
2528 cmp %o4, %g0 ! TTE is > 0 iff not valid
2529 bge %xcc, 4f ! If invalid, go displace
2530 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
2531 bnz,a %icc, 3b ! If locked, look at next
2532 add %g3, (1 << 3), %g3 ! entry
2533 4:
2534 !
2535 ! We found an unlocked or invalid entry; we'll explicitly load
2536 ! the former index 0 entry here.
2537 !
2538 sethi %hi(FLUSH_ADDR), %o4
2539 set MMU_TAG_ACCESS, %g4
2540 stxa %o2, [%g4]ASI_IMMU
2541 stxa %o1, [%g3]ASI_ITLB_ACCESS
2542 flush %o4 ! Flush required for I-MMU
2543 retl
2544 wrpr %g0, %o3, %pstate ! Enable interrupts
2545 SET_SIZE(itlb_erratum34_fixup)
2546
2547 !
2548 ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
2549 ! index 0 TTE will erroneously be displaced when a new TTE is
2550 ! loaded. In order to avoid cheetah+ erratum 34, locked index 0
2551 ! TTEs must be relocated.
2552 !
2553 ENTRY_NP(dtlb_erratum34_fixup)
2554 rdpr %pstate, %o3
2555 #ifdef DEBUG
2556 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
2557 #endif /* DEBUG */
2558 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts
2559 ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data
2560 ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag
2561
2562 cmp %o1, %g0 ! Is this entry valid?
2563 bge %xcc, 1f
2564 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked?
2565 bnz %icc, 2f
2566 nop
2585 ! of the lock bit).
2586 !
2587 cmp %o4, %g0 ! TTE is > 0 iff not valid
2588 bge %xcc, 4f ! If invalid, go displace
2589 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit
2590 bnz,a %icc, 3b ! If locked, look at next
2591 add %g3, (1 << 3), %g3 ! entry
2592 4:
2593 !
2594 ! We found an unlocked or invalid entry; we'll explicitly load
2595 ! the former index 0 entry here.
2596 !
2597 set MMU_TAG_ACCESS, %g4
2598 stxa %o2, [%g4]ASI_DMMU
2599 stxa %o1, [%g3]ASI_DTLB_ACCESS
2600 membar #Sync
2601 retl
2602 wrpr %g0, %o3, %pstate ! Enable interrupts
2603 SET_SIZE(dtlb_erratum34_fixup)
2604
2605 #endif /* CHEETAHPLUS_ERRATUM_34 */
2606
|