1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  *
  25  * Assembly code support for Cheetah/Cheetah+ modules
  26  */
  27 
  28 #include "assym.h"
  29 
  30 #include <sys/asm_linkage.h>
  31 #include <sys/mmu.h>
  32 #include <vm/hat_sfmmu.h>
  33 #include <sys/machparam.h>
  34 #include <sys/machcpuvar.h>
  35 #include <sys/machthread.h>
  36 #include <sys/machtrap.h>
  37 #include <sys/privregs.h>
  38 #include <sys/trap.h>
  39 #include <sys/cheetahregs.h>
  40 #include <sys/us3_module.h>
  41 #include <sys/xc_impl.h>
  42 #include <sys/intreg.h>
  43 #include <sys/async.h>
  44 #include <sys/clock.h>
  45 #include <sys/cheetahasm.h>
  46 #include <sys/cmpregs.h>
  47 
  48 #ifdef TRAPTRACE
  49 #include <sys/traptrace.h>
  50 #endif /* TRAPTRACE */
  51 
  52 /* BEGIN CSTYLED */
  53 
  54 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)                  \
  55         ldxa    [%g0]ASI_DCU, tmp1                                      ;\
  56         btst    DCU_DC, tmp1            /* is dcache enabled? */        ;\
  57         bz,pn   %icc, 1f                                                ;\
  58         ASM_LD(tmp1, dcache_linesize)                                   ;\
  59         ASM_LD(tmp2, dflush_type)                                       ;\
  60         cmp     tmp2, FLUSHPAGE_TYPE                                    ;\
  61         be,pt   %icc, 2f                                                ;\
  62         nop                                                             ;\
  63         sllx    arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */   ;\
  64         ASM_LD(tmp3, dcache_size)                                       ;\
  65         cmp     tmp2, FLUSHMATCH_TYPE                                   ;\
  66         be,pt   %icc, 3f                                                ;\
  67         nop                                                             ;\
  68         /*                                                              \
  69          * flushtype = FLUSHALL_TYPE, flush the whole thing             \
  70          * tmp3 = cache size                                            \
  71          * tmp1 = cache line size                                       \
  72          */                                                             \
  73         sub     tmp3, tmp1, tmp2                                        ;\
  74 4:                                                                      \
  75         stxa    %g0, [tmp2]ASI_DC_TAG                                   ;\
  76         membar  #Sync                                                   ;\
  77         cmp     %g0, tmp2                                               ;\
  78         bne,pt  %icc, 4b                                                ;\
  79         sub     tmp2, tmp1, tmp2                                        ;\
  80         ba,pt   %icc, 1f                                                ;\
  81         nop                                                             ;\
  82         /*                                                              \
  83          * flushtype = FLUSHPAGE_TYPE                                   \
  84          * arg1 = pfn                                                   \
  85          * arg2 = virtual color                                         \
  86          * tmp1 = cache line size                                       \
  87          * tmp2 = tag from cache                                        \
  88          * tmp3 = counter                                               \
  89          */                                                             \
  90 2:                                                                      \
  91         set     MMU_PAGESIZE, tmp3                                      ;\
  92         sllx    arg1, MMU_PAGESHIFT, arg1  /* pfn to 43 bit PA     */   ;\
  93         sub     tmp3, tmp1, tmp3                                        ;\
  94 4:                                                                      \
  95         stxa    %g0, [arg1 + tmp3]ASI_DC_INVAL                          ;\
  96         membar  #Sync                                                   ;\
  97 5:                                                                      \
  98         cmp     %g0, tmp3                                               ;\
  99         bnz,pt  %icc, 4b                /* branch if not done */        ;\
 100         sub     tmp3, tmp1, tmp3                                        ;\
 101         ba,pt   %icc, 1f                                                ;\
 102         nop                                                             ;\
 103         /*                                                              \
 104          * flushtype = FLUSHMATCH_TYPE                                  \
 105          * arg1 = tag to compare against                                \
 106          * tmp1 = cache line size                                       \
 107          * tmp3 = cache size                                            \
 108          * arg2 = counter                                               \
 109          * tmp2 = cache tag                                             \
 110          */                                                             \
 111 3:                                                                      \
 112         sub     tmp3, tmp1, arg2                                        ;\
 113 4:                                                                      \
 114         ldxa    [arg2]ASI_DC_TAG, tmp2          /* read tag */          ;\
 115         btst    CHEETAH_DC_VBIT_MASK, tmp2                              ;\
 116         bz,pn   %icc, 5f                /* br if no valid sub-blocks */ ;\
 117         andn    tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\
 118         cmp     tmp2, arg1                                              ;\
 119         bne,pn  %icc, 5f                /* branch if tag miss */        ;\
 120         nop                                                             ;\
 121         stxa    %g0, [arg2]ASI_DC_TAG                                   ;\
 122         membar  #Sync                                                   ;\
 123 5:                                                                      \
 124         cmp     %g0, arg2                                               ;\
 125         bne,pt  %icc, 4b                /* branch if not done */        ;\
 126         sub     arg2, tmp1, arg2                                        ;\
 127 1:
 128 
 129 /*
 130  * macro that flushes the entire dcache color
 131  * dcache size = 64K, one way 16K
 132  *
 133  * In:
 134  *    arg = virtual color register (not clobbered)
 135  *    way = way#, can either be a constant or a register (not clobbered)
 136  *    tmp1, tmp2, tmp3 = scratch registers
 137  *
 138  */
 139 #define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3)                   \
 140         ldxa    [%g0]ASI_DCU, tmp1;                                     \
 141         btst    DCU_DC, tmp1;           /* is dcache enabled? */        \
 142         bz,pn   %icc, 1f;                                               \
 143         ASM_LD(tmp1, dcache_linesize)                                   \
 144         /*                                                              \
 145          * arg = virtual color                                          \
 146          * tmp1 = cache line size                                       \
 147          */                                                             \
 148         sllx    arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */    \
 149         mov     way, tmp3;                                              \
 150         sllx    tmp3, 14, tmp3;           /* One way 16K */             \
 151         or      tmp2, tmp3, tmp3;                                       \
 152         set     MMU_PAGESIZE, tmp2;                                     \
 153         /*                                                              \
 154          * tmp2 = page size                                             \
 155          * tmp3 =  cached page in dcache                                \
 156          */                                                             \
 157         sub     tmp2, tmp1, tmp2;                                       \
 158 2:                                                                      \
 159         stxa    %g0, [tmp3 + tmp2]ASI_DC_TAG;                           \
 160         membar  #Sync;                                                  \
 161         cmp     %g0, tmp2;                                              \
 162         bne,pt  %icc, 2b;                                               \
 163         sub     tmp2, tmp1, tmp2;                                       \
 164 1:
 165 
 166 /* END CSTYLED */
 167 
 168 /*
 169  * Cheetah MMU and Cache operations.
 170  */
 171 
 172         ENTRY_NP(vtag_flushpage)
 173         /*
 174          * flush page from the tlb
 175          *
 176          * %o0 = vaddr
 177          * %o1 = sfmmup
 178          */
 179         rdpr    %pstate, %o5
 180 #ifdef DEBUG
 181         PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1)
 182 #endif /* DEBUG */
 183         /*
 184          * disable ints
 185          */
 186         andn    %o5, PSTATE_IE, %o4
 187         wrpr    %o4, 0, %pstate
 188 
 189         /*
 190          * Then, blow out the tlb
 191          * Interrupts are disabled to prevent the primary ctx register
 192          * from changing underneath us.
 193          */
 194         sethi   %hi(ksfmmup), %o3
 195         ldx     [%o3 + %lo(ksfmmup)], %o3
 196         cmp     %o3, %o1
 197         bne,pt   %xcc, 1f                       ! if not kernel as, go to 1
 198           sethi %hi(FLUSH_ADDR), %o3
 199         /*
 200          * For Kernel demaps use primary. type = page implicitly
 201          */
 202         stxa    %g0, [%o0]ASI_DTLB_DEMAP        /* dmmu flush for KCONTEXT */
 203         stxa    %g0, [%o0]ASI_ITLB_DEMAP        /* immu flush for KCONTEXT */
 204         flush   %o3
 205         retl
 206           wrpr  %g0, %o5, %pstate               /* enable interrupts */
 207 1:
 208         /*
 209          * User demap.  We need to set the primary context properly.
 210          * Secondary context cannot be used for Cheetah IMMU.
 211          * %o0 = vaddr
 212          * %o1 = sfmmup
 213          * %o3 = FLUSH_ADDR
 214          */
 215         SFMMU_CPU_CNUM(%o1, %g1, %g2)           ! %g1 = sfmmu cnum on this CPU
 216         
 217         ldub    [%o1 + SFMMU_CEXT], %o4         ! %o4 = sfmmup->sfmmu_cext
 218         sll     %o4, CTXREG_EXT_SHIFT, %o4
 219         or      %g1, %o4, %g1                   ! %g1 = primary pgsz | cnum
 220 
 221         wrpr    %g0, 1, %tl
 222         set     MMU_PCONTEXT, %o4
 223         or      DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0
 224         ldxa    [%o4]ASI_DMMU, %o2              ! %o2 = save old ctxnum
 225         srlx    %o2, CTXREG_NEXT_SHIFT, %o1     ! need to preserve nucleus pgsz
 226         sllx    %o1, CTXREG_NEXT_SHIFT, %o1     ! %o1 = nucleus pgsz
 227         or      %g1, %o1, %g1                   ! %g1 = nucleus pgsz | primary pgsz | cnum
 228         stxa    %g1, [%o4]ASI_DMMU              ! wr new ctxum 
 229 
 230         stxa    %g0, [%o0]ASI_DTLB_DEMAP
 231         stxa    %g0, [%o0]ASI_ITLB_DEMAP
 232         stxa    %o2, [%o4]ASI_DMMU              /* restore old ctxnum */
 233         flush   %o3
 234         wrpr    %g0, 0, %tl
 235 
 236         retl
 237         wrpr    %g0, %o5, %pstate               /* enable interrupts */
 238         SET_SIZE(vtag_flushpage)
 239 
 240         ENTRY_NP2(vtag_flushall, demap_all)
 241         /*
 242          * flush the tlb
 243          */
 244         sethi   %hi(FLUSH_ADDR), %o3
 245         set     DEMAP_ALL_TYPE, %g1
 246         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 247         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 248         flush   %o3
 249         retl
 250         nop
 251         SET_SIZE(demap_all)
 252         SET_SIZE(vtag_flushall)
 253 
 254 
 255         ENTRY_NP(vtag_flushpage_tl1)
 256         /*
 257          * x-trap to flush page from tlb and tsb
 258          *
 259          * %g1 = vaddr, zero-extended on 32-bit kernel
 260          * %g2 = sfmmup
 261          *
 262          * assumes TSBE_TAG = 0
 263          */
 264         srln    %g1, MMU_PAGESHIFT, %g1
 265                 
 266         sethi   %hi(ksfmmup), %g3
 267         ldx     [%g3 + %lo(ksfmmup)], %g3
 268         cmp     %g3, %g2
 269         bne,pt  %xcc, 1f                        ! if not kernel as, go to 1
 270           slln  %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 271 
 272         /* We need to demap in the kernel context */
 273         or      DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
 274         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 275         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 276         retry
 277 1:
 278         /* We need to demap in a user context */
 279         or      DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
 280 
 281         SFMMU_CPU_CNUM(%g2, %g6, %g3)   ! %g6 = sfmmu cnum on this CPU
 282         
 283         ldub    [%g2 + SFMMU_CEXT], %g4         ! %g4 = sfmmup->cext
 284         sll     %g4, CTXREG_EXT_SHIFT, %g4
 285         or      %g6, %g4, %g6                   ! %g6 = pgsz | cnum
 286 
 287         set     MMU_PCONTEXT, %g4
 288         ldxa    [%g4]ASI_DMMU, %g5              /* rd old ctxnum */
 289         srlx    %g5, CTXREG_NEXT_SHIFT, %g2     /* %g2 = nucleus pgsz */
 290         sllx    %g2, CTXREG_NEXT_SHIFT, %g2     /* preserve nucleus pgsz */
 291         or      %g6, %g2, %g6                   /* %g6 = nucleus pgsz | primary pgsz | cnum */
 292         stxa    %g6, [%g4]ASI_DMMU              /* wr new ctxum */
 293         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 294         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 295         stxa    %g5, [%g4]ASI_DMMU              /* restore old ctxnum */
 296         retry
 297         SET_SIZE(vtag_flushpage_tl1)
 298 
 299 
 300         ENTRY_NP(vtag_flush_pgcnt_tl1)
 301         /*
 302          * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
 303          *
 304          * %g1 = vaddr, zero-extended on 32-bit kernel
 305          * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits.
 306          *
 307          * NOTE: this handler relies on the fact that no
 308          *      interrupts or traps can occur during the loop
 309          *      issuing the TLB_DEMAP operations. It is assumed
 310          *      that interrupts are disabled and this code is
 311          *      fetching from the kernel locked text address.
 312          *
 313          * assumes TSBE_TAG = 0
 314          */
 315         set     SFMMU_PGCNT_MASK, %g4
 316         and     %g4, %g2, %g3                   /* g3 = pgcnt - 1 */
 317         add     %g3, 1, %g3                     /* g3 = pgcnt */
 318 
 319         andn    %g2, SFMMU_PGCNT_MASK, %g2      /* g2 = sfmmup */
 320         srln    %g1, MMU_PAGESHIFT, %g1
 321 
 322         sethi   %hi(ksfmmup), %g4
 323         ldx     [%g4 + %lo(ksfmmup)], %g4
 324         cmp     %g4, %g2
 325         bne,pn   %xcc, 1f                       /* if not kernel as, go to 1 */
 326           slln  %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 327 
 328         /* We need to demap in the kernel context */
 329         or      DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1
 330         set     MMU_PAGESIZE, %g2               /* g2 = pgsize */
 331         sethi   %hi(FLUSH_ADDR), %g5
 332 4:
 333         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 334         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 335         flush   %g5                             ! flush required by immu
 336 
 337         deccc   %g3                             /* decr pgcnt */
 338         bnz,pt  %icc,4b
 339           add   %g1, %g2, %g1                   /* next page */
 340         retry
 341 1:
 342         /*
 343          * We need to demap in a user context
 344          *
 345          * g2 = sfmmup
 346          * g3 = pgcnt
 347          */
 348         SFMMU_CPU_CNUM(%g2, %g5, %g6)           ! %g5 = sfmmu cnum on this CPU
 349                 
 350         or      DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1
 351 
 352         ldub    [%g2 + SFMMU_CEXT], %g4         ! %g4 = sfmmup->cext
 353         sll     %g4, CTXREG_EXT_SHIFT, %g4
 354         or      %g5, %g4, %g5
 355 
 356         set     MMU_PCONTEXT, %g4
 357         ldxa    [%g4]ASI_DMMU, %g6              /* rd old ctxnum */
 358         srlx    %g6, CTXREG_NEXT_SHIFT, %g2     /* %g2 = nucleus pgsz */
 359         sllx    %g2, CTXREG_NEXT_SHIFT, %g2     /* preserve nucleus pgsz */
 360         or      %g5, %g2, %g5                   /* %g5 = nucleus pgsz | primary pgsz | cnum */
 361         stxa    %g5, [%g4]ASI_DMMU              /* wr new ctxum */
 362 
 363         set     MMU_PAGESIZE, %g2               /* g2 = pgsize */
 364         sethi   %hi(FLUSH_ADDR), %g5
 365 3:
 366         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 367         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 368         flush   %g5                             ! flush required by immu
 369 
 370         deccc   %g3                             /* decr pgcnt */
 371         bnz,pt  %icc,3b
 372           add   %g1, %g2, %g1                   /* next page */
 373 
 374         stxa    %g6, [%g4]ASI_DMMU              /* restore old ctxnum */
 375         retry
 376         SET_SIZE(vtag_flush_pgcnt_tl1)
 377 
 378         ENTRY_NP(vtag_flushall_tl1)
 379         /*
 380          * x-trap to flush tlb
 381          */
 382         set     DEMAP_ALL_TYPE, %g4
 383         stxa    %g0, [%g4]ASI_DTLB_DEMAP
 384         stxa    %g0, [%g4]ASI_ITLB_DEMAP
 385         retry
 386         SET_SIZE(vtag_flushall_tl1)
 387 
 388 
 389 /*
 390  * vac_flushpage(pfnum, color)
 391  *      Flush 1 8k page of the D-$ with physical page = pfnum
 392  *      Algorithm:
 393  *              The cheetah dcache is a 64k psuedo 4 way accaociative cache.
 394  *              It is virtual indexed, physically tagged cache.
 395  */
 396         .seg    ".data"
 397         .align  8
 398         .global dflush_type
 399 dflush_type:
 400         .word   FLUSHPAGE_TYPE
 401 
 402         ENTRY(vac_flushpage)
 403         /*
 404          * flush page from the d$
 405          *
 406          * %o0 = pfnum, %o1 = color
 407          */
 408         DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
 409         retl
 410           nop
 411         SET_SIZE(vac_flushpage)
 412 
 413 
 414         ENTRY_NP(vac_flushpage_tl1)
 415         /*
 416          * x-trap to flush page from the d$
 417          *
 418          * %g1 = pfnum, %g2 = color
 419          */
 420         DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
 421         retry
 422         SET_SIZE(vac_flushpage_tl1)
 423 
 424 
 425         ENTRY(vac_flushcolor)
 426         /*
 427          * %o0 = vcolor
 428          */
 429         DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3)
 430         DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3)
 431         DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3)
 432         DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3)
 433         retl
 434           nop
 435         SET_SIZE(vac_flushcolor)
 436 
 437 
 438         ENTRY(vac_flushcolor_tl1)
 439         /*
 440          * %g1 = vcolor
 441          */
 442         DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4)
 443         DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4)
 444         DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4)
 445         DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4)
 446         retry
 447         SET_SIZE(vac_flushcolor_tl1)
 448 
 449 /*
 450  * Determine whether or not the IDSR is busy.
 451  * Entry: no arguments
 452  * Returns: 1 if busy, 0 otherwise
 453  */
 454         ENTRY(idsr_busy)
 455         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 456         clr     %o0
 457         btst    IDSR_BUSY, %g1
 458         bz,a,pt %xcc, 1f
 459         mov     1, %o0
 460 1:
 461         retl
 462         nop
 463         SET_SIZE(idsr_busy)
 464 
 465         .global _dispatch_status_busy
 466 _dispatch_status_busy:
 467         .asciz  "ASI_INTR_DISPATCH_STATUS error: busy"
 468         .align  4
 469 
 470 /*
 471  * Setup interrupt dispatch data registers
 472  * Entry:
 473  *      %o0 - function or inumber to call
 474  *      %o1, %o2 - arguments (2 uint64_t's)
 475  */
 476         .seg "text"
 477 
 478         ENTRY(init_mondo)
 479 #ifdef DEBUG
 480         !
 481         ! IDSR should not be busy at the moment
 482         !
 483         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 484         btst    IDSR_BUSY, %g1
 485         bz,pt   %xcc, 1f
 486         nop
 487         sethi   %hi(_dispatch_status_busy), %o0
 488         call    panic
 489         or      %o0, %lo(_dispatch_status_busy), %o0
 490 #endif /* DEBUG */
 491 
 492         ALTENTRY(init_mondo_nocheck)
 493         !
 494         ! interrupt vector dispatch data reg 0
 495         !
 496 1:
 497         mov     IDDR_0, %g1
 498         mov     IDDR_1, %g2
 499         mov     IDDR_2, %g3
 500         stxa    %o0, [%g1]ASI_INTR_DISPATCH
 501 
 502         !
 503         ! interrupt vector dispatch data reg 1
 504         !
 505         stxa    %o1, [%g2]ASI_INTR_DISPATCH
 506 
 507         !
 508         ! interrupt vector dispatch data reg 2
 509         !
 510         stxa    %o2, [%g3]ASI_INTR_DISPATCH
 511 
 512         membar  #Sync
 513         retl
 514         nop
 515         SET_SIZE(init_mondo_nocheck)
 516         SET_SIZE(init_mondo)
 517 
 518 
 519 #if !(defined(JALAPENO) || defined(SERRANO))
 520 
 521 /*
 522  * Ship mondo to aid using busy/nack pair bn
 523  */
 524         ENTRY_NP(shipit)
 525         sll     %o0, IDCR_PID_SHIFT, %g1        ! IDCR<18:14> = agent id
 526         sll     %o1, IDCR_BN_SHIFT, %g2         ! IDCR<28:24> = b/n pair
 527         or      %g1, IDCR_OFFSET, %g1           ! IDCR<13:0> = 0x70
 528         or      %g1, %g2, %g1
 529         stxa    %g0, [%g1]ASI_INTR_DISPATCH     ! interrupt vector dispatch
 530         membar  #Sync
 531         retl
 532         nop
 533         SET_SIZE(shipit)
 534 
 535 #endif  /* !(JALAPENO || SERRANO) */
 536 
 537 
 538 /*
 539  * flush_instr_mem:
 540  *      Flush 1 page of the I-$ starting at vaddr
 541  *      %o0 vaddr
 542  *      %o1 bytes to be flushed
 543  * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with
 544  * the stores from all processors so that a FLUSH instruction is only needed
 545  * to ensure pipeline is consistent. This means a single flush is sufficient at
 546  * the end of a sequence of stores that updates the instruction stream to
 547  * ensure correct operation.
 548  */
 549 
 550         ENTRY(flush_instr_mem)
 551         flush   %o0                     ! address irrelevant
 552         retl
 553         nop
 554         SET_SIZE(flush_instr_mem)
 555 
 556 
 557 #if defined(CPU_IMP_ECACHE_ASSOC)
 558 
 559         ENTRY(get_ecache_ctrl)
 560         GET_CPU_IMPL(%o0)
 561         cmp     %o0, JAGUAR_IMPL
 562         !
 563         ! Putting an ASI access in the delay slot may
 564         ! cause it to be accessed, even when annulled.
 565         !
 566         bne     1f
 567           nop
 568         ldxa    [%g0]ASI_EC_CFG_TIMING, %o0     ! read Jaguar shared E$ ctrl reg
 569         b       2f
 570           nop
 571 1:      
 572         ldxa    [%g0]ASI_EC_CTRL, %o0           ! read Ch/Ch+ E$ control reg
 573 2:
 574         retl
 575           nop
 576         SET_SIZE(get_ecache_ctrl)
 577 
 578 #endif  /* CPU_IMP_ECACHE_ASSOC */
 579 
 580 
 581 #if !(defined(JALAPENO) || defined(SERRANO))
 582 
 583 /*
 584  * flush_ecache:
 585  *      %o0 - 64 bit physical address
 586  *      %o1 - ecache size
 587  *      %o2 - ecache linesize
 588  */
 589 
 590         ENTRY(flush_ecache)
 591 
 592         /*
 593          * For certain CPU implementations, we have to flush the L2 cache
 594          * before flushing the ecache.
 595          */
 596         PN_L2_FLUSHALL(%g3, %g4, %g5)
 597 
 598         /*
 599          * Flush the entire Ecache using displacement flush.
 600          */
 601         ECACHE_FLUSHALL(%o1, %o2, %o0, %o4)
 602 
 603         retl
 604         nop
 605         SET_SIZE(flush_ecache)
 606 
 607 #endif  /* !(JALAPENO || SERRANO) */
 608 
 609 
 610         ENTRY(flush_dcache)
 611         ASM_LD(%o0, dcache_size)
 612         ASM_LD(%o1, dcache_linesize)
 613         CH_DCACHE_FLUSHALL(%o0, %o1, %o2)
 614         retl
 615         nop
 616         SET_SIZE(flush_dcache)
 617 
 618 
 619         ENTRY(flush_icache)
 620         GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1);
 621         ld      [%o0 + CHPR_ICACHE_LINESIZE], %o1
 622         ba,pt   %icc, 2f
 623           ld    [%o0 + CHPR_ICACHE_SIZE], %o0
 624 flush_icache_1:
 625         ASM_LD(%o0, icache_size)
 626         ASM_LD(%o1, icache_linesize)
 627 2:
 628         CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
 629         retl
 630         nop
 631         SET_SIZE(flush_icache)
 632 
 633         ENTRY(kdi_flush_idcache)
 634         CH_DCACHE_FLUSHALL(%o0, %o1, %g1)
 635         CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2)
 636         membar  #Sync
 637         retl
 638         nop
 639         SET_SIZE(kdi_flush_idcache)
 640 
 641         ENTRY(flush_pcache)
 642         PCACHE_FLUSHALL(%o0, %o1, %o2)
 643         retl
 644         nop
 645         SET_SIZE(flush_pcache)
 646 
 647 
 648 #if defined(CPU_IMP_L1_CACHE_PARITY)
 649 
 650 /*
 651  * Get dcache data and tag.  The Dcache data is a pointer to a ch_dc_data_t
 652  * structure (see cheetahregs.h):
 653  * The Dcache *should* be turned off when this code is executed.
 654  */
 655         .align  128
 656         ENTRY(get_dcache_dtag)
 657         rdpr    %pstate, %o5
 658         andn    %o5, PSTATE_IE | PSTATE_AM, %o3
 659         wrpr    %g0, %o3, %pstate
 660         b       1f
 661           stx   %o0, [%o1 + CH_DC_IDX]
 662 
 663         .align  128
 664 1:
 665         ldxa    [%o0]ASI_DC_TAG, %o2
 666         stx     %o2, [%o1 + CH_DC_TAG]
 667         membar  #Sync
 668         ldxa    [%o0]ASI_DC_UTAG, %o2
 669         membar  #Sync
 670         stx     %o2, [%o1 + CH_DC_UTAG]
 671         ldxa    [%o0]ASI_DC_SNP_TAG, %o2
 672         stx     %o2, [%o1 + CH_DC_SNTAG]
 673         add     %o1, CH_DC_DATA, %o1
 674         clr     %o3
 675 2:
 676         membar  #Sync                           ! required before ASI_DC_DATA
 677         ldxa    [%o0 + %o3]ASI_DC_DATA, %o2
 678         membar  #Sync                           ! required after ASI_DC_DATA
 679         stx     %o2, [%o1 + %o3]
 680         cmp     %o3, CH_DC_DATA_REG_SIZE - 8
 681         blt     2b
 682           add   %o3, 8, %o3
 683 
 684         /*
 685          * Unlike other CPUs in the family, D$ data parity bits for Panther
 686          * do not reside in the microtag. Instead, we have to read them
 687          * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead
 688          * of just having 8 parity bits to protect all 32 bytes of data
 689          * per line, we now have 32 bits of parity.
 690          */
 691         GET_CPU_IMPL(%o3)
 692         cmp     %o3, PANTHER_IMPL
 693         bne     4f
 694           clr   %o3
 695 
 696         /*
 697          * move our pointer to the next field where we store parity bits
 698          * and add the offset of the last parity byte since we will be
 699          * storing all 4 parity bytes within one 64 bit field like this:
 700          *
 701          * +------+------------+------------+------------+------------+
 702          * |  -   | DC_parity  | DC_parity  | DC_parity  | DC_parity  |
 703          * |  -   | for word 3 | for word 2 | for word 1 | for word 0 |
 704          * +------+------------+------------+------------+------------+
 705          *  63:32     31:24        23:16         15:8          7:0     
 706          */
 707         add     %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1
 708 
 709         /* add the DC_data_parity bit into our working index */
 710         mov     1, %o2
 711         sll     %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2
 712         or      %o0, %o2, %o0
 713 3:
 714         membar  #Sync                           ! required before ASI_DC_DATA
 715         ldxa    [%o0 + %o3]ASI_DC_DATA, %o2
 716         membar  #Sync                           ! required after ASI_DC_DATA
 717         stb     %o2, [%o1]
 718         dec     %o1
 719         cmp     %o3, CH_DC_DATA_REG_SIZE - 8
 720         blt     3b
 721           add   %o3, 8, %o3
 722 4:
 723         retl
 724           wrpr  %g0, %o5, %pstate       
 725         SET_SIZE(get_dcache_dtag)
 726 
 727 
 728 /*
 729  * Get icache data and tag.  The data argument is a pointer to a ch_ic_data_t
 730  * structure (see cheetahregs.h):
 731  * The Icache *Must* be turned off when this function is called.
 732  * This is because diagnostic accesses to the Icache interfere with cache
 733  * consistency.
 734  */
 735         .align  128
 736         ENTRY(get_icache_dtag)
 737         rdpr    %pstate, %o5
 738         andn    %o5, PSTATE_IE | PSTATE_AM, %o3
 739         wrpr    %g0, %o3, %pstate
 740 
 741         stx     %o0, [%o1 + CH_IC_IDX]
 742         ldxa    [%o0]ASI_IC_TAG, %o2
 743         stx     %o2, [%o1 + CH_IC_PATAG]
 744         add     %o0, CH_ICTAG_UTAG, %o0
 745         ldxa    [%o0]ASI_IC_TAG, %o2
 746         add     %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0
 747         stx     %o2, [%o1 + CH_IC_UTAG]
 748         ldxa    [%o0]ASI_IC_TAG, %o2
 749         add     %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0
 750         stx     %o2, [%o1 + CH_IC_UPPER]
 751         ldxa    [%o0]ASI_IC_TAG, %o2
 752         andn    %o0, CH_ICTAG_TMASK, %o0
 753         stx     %o2, [%o1 + CH_IC_LOWER]
 754         ldxa    [%o0]ASI_IC_SNP_TAG, %o2
 755         stx     %o2, [%o1 + CH_IC_SNTAG]
 756         add     %o1, CH_IC_DATA, %o1
 757         clr     %o3
 758 2:
 759         ldxa    [%o0 + %o3]ASI_IC_DATA, %o2
 760         stx     %o2, [%o1 + %o3]
 761         cmp     %o3, PN_IC_DATA_REG_SIZE - 8
 762         blt     2b
 763           add   %o3, 8, %o3
 764 
 765         retl
 766           wrpr  %g0, %o5, %pstate       
 767         SET_SIZE(get_icache_dtag)
 768 
 769 /*
 770  * Get pcache data and tags.
 771  * inputs:
 772  *   pcache_idx - fully constructed VA for for accessing P$ diagnostic
 773  *                registers. Contains PC_way and PC_addr shifted into
 774  *                the correct bit positions. See the PRM for more details.
 775  *   data       - pointer to a ch_pc_data_t
 776  * structure (see cheetahregs.h):
 777  */
 778         .align  128
 779         ENTRY(get_pcache_dtag)
 780         rdpr    %pstate, %o5
 781         andn    %o5, PSTATE_IE | PSTATE_AM, %o3
 782         wrpr    %g0, %o3, %pstate
 783 
 784         stx     %o0, [%o1 + CH_PC_IDX]
 785         ldxa    [%o0]ASI_PC_STATUS_DATA, %o2
 786         stx     %o2, [%o1 + CH_PC_STATUS]
 787         ldxa    [%o0]ASI_PC_TAG, %o2
 788         stx     %o2, [%o1 + CH_PC_TAG]
 789         ldxa    [%o0]ASI_PC_SNP_TAG, %o2
 790         stx     %o2, [%o1 + CH_PC_SNTAG]
 791         add     %o1, CH_PC_DATA, %o1
 792         clr     %o3
 793 2:
 794         ldxa    [%o0 + %o3]ASI_PC_DATA, %o2
 795         stx     %o2, [%o1 + %o3]
 796         cmp     %o3, CH_PC_DATA_REG_SIZE - 8
 797         blt     2b
 798           add   %o3, 8, %o3
 799 
 800         retl
 801           wrpr  %g0, %o5, %pstate       
 802         SET_SIZE(get_pcache_dtag)
 803 
 804 #endif  /* CPU_IMP_L1_CACHE_PARITY */
 805 
 806 /*
 807  * re-enable the i$, d$, w$, and p$ according to bootup cache state.
 808  * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE.
 809  *   %o0 - 64 bit constant
 810  */
 811         ENTRY(set_dcu)
 812         stxa    %o0, [%g0]ASI_DCU       ! Store to DCU
 813         flush   %g0     /* flush required after changing the IC bit */
 814         retl
 815         nop
 816         SET_SIZE(set_dcu)
 817 
 818 
 819 /*
 820  * Return DCU register.
 821  */
 822         ENTRY(get_dcu)
 823         ldxa    [%g0]ASI_DCU, %o0               /* DCU control register */
 824         retl
 825         nop
 826         SET_SIZE(get_dcu)
 827 
 828 /*
 829  * Cheetah/Cheetah+ level 15 interrupt handler trap table entry.
 830  *
 831  * This handler is used to check for softints generated by error trap
 832  * handlers to report errors.  On Cheetah, this mechanism is used by the
 833  * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast
 834  * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers.
 835  * NB: Must be 8 instructions or less to fit in trap table and code must
 836  *     be relocatable.
 837  */
 838 
 839         ENTRY_NP(ch_pil15_interrupt_instr)
 840         ASM_JMP(%g1, ch_pil15_interrupt)
 841         SET_SIZE(ch_pil15_interrupt_instr)
 842 
 843 
 844         ENTRY_NP(ch_pil15_interrupt)
 845 
 846         /*
 847          * Since pil_interrupt is hacked to assume that every level 15
 848          * interrupt is generated by the CPU to indicate a performance
 849          * counter overflow this gets ugly.  Before calling pil_interrupt
 850          * the Error at TL>0 pending status is inspected.  If it is
 851          * non-zero, then an error has occurred and it is handled.
 852          * Otherwise control is transfered to pil_interrupt.  Note that if
 853          * an error is detected pil_interrupt will not be called and
 854          * overflow interrupts may be lost causing erroneous performance
 855          * measurements.  However, error-recovery will have a detrimental
 856          * effect on performance anyway.
 857          */
 858         CPU_INDEX(%g1, %g4)
 859         set     ch_err_tl1_pending, %g4
 860         ldub    [%g1 + %g4], %g2
 861         brz     %g2, 1f
 862           nop
 863 
 864         /*
 865          * We have a pending TL>0 error, clear the TL>0 pending status.
 866          */
 867         stb     %g0, [%g1 + %g4]
 868 
 869         /*
 870          * Clear the softint.
 871          */
 872         mov     1, %g5
 873         sll     %g5, PIL_15, %g5
 874         wr      %g5, CLEAR_SOFTINT
 875 
 876         /*
 877          * For Cheetah*, call cpu_tl1_error via systrap at PIL 15
 878          * to process the Fast ECC/Cache Parity at TL>0 error.  Clear
 879          * panic flag (%g2).
 880          */
 881         set     cpu_tl1_error, %g1
 882         clr     %g2
 883         ba      sys_trap
 884           mov   PIL_15, %g4
 885 
 886 1:
 887         /*
 888          * The logout is invalid.
 889          *
 890          * Call the default interrupt handler.
 891          */
 892         sethi   %hi(pil_interrupt), %g1
 893         jmp     %g1 + %lo(pil_interrupt)
 894           mov   PIL_15, %g4
 895 
 896         SET_SIZE(ch_pil15_interrupt)
 897 
 898 
 899 /*
 900  * Error Handling
 901  *
 902  * Cheetah provides error checking for all memory access paths between
 903  * the CPU, External Cache, Cheetah Data Switch and system bus. Error
 904  * information is logged in the AFSR, (also AFSR_EXT for Panther) and
 905  * AFAR and one of the following traps is generated (provided that it
 906  * is enabled in External Cache Error Enable Register) to handle that
 907  * error:
 908  * 1. trap 0x70: Precise trap 
 909  *    tt0_fecc for errors at trap level(TL)>=0
 910  * 2. trap 0x0A and 0x32: Deferred trap
 911  *    async_err for errors at TL>=0
 912  * 3. trap 0x63: Disrupting trap
 913  *    ce_err for errors at TL=0
 914  *    (Note that trap 0x63 cannot happen at trap level > 0)
 915  *
 916  * Trap level one handlers panic the system except for the fast ecc
 917  * error handler which tries to recover from certain errors.
 918  */
 919 
 920 /*
 921  * FAST ECC TRAP STRATEGY:
 922  *
 923  * Software must handle single and multi bit errors which occur due to data
 924  * or instruction cache reads from the external cache. A single or multi bit
 925  * error occuring in one of these situations results in a precise trap.
 926  *
 927  * The basic flow of this trap handler is as follows:
 928  *
 929  * 1) Record the state and then turn off the Dcache and Icache.  The Dcache
 930  *    is disabled because bad data could have been installed.  The Icache is
 931  *    turned off because we want to capture the Icache line related to the
 932  *    AFAR.
 933  * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing.
 934  * 3) Park sibling core if caches are shared (to avoid race condition while
 935  *    accessing shared resources such as L3 data staging register during
 936  *    CPU logout.
 937  * 4) Read the AFAR and AFSR.
 938  * 5) If CPU logout structure is not being used, then:
 939  *    6) Clear all errors from the AFSR.
 940  *    7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure.
 941  *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
 942  *       state.
 943  *    9) Unpark sibling core if we parked it earlier.
 944  *    10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already
 945  *        running at PIL 15.
 946  * 6) Otherwise, if CPU logout structure is being used:
 947  *    7) Incriment the "logout busy count".
 948  *    8) Flush Ecache then Flush Dcache and Icache and restore to previous
 949  *       state.
 950  *    9) Unpark sibling core if we parked it earlier.
 951  *    10) Issue a retry since the other CPU error logging code will end up
 952  *       finding this error bit and logging information about it later.
 953  * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not
 954  *    yet initialized such that we can't even check the logout struct, then
 955  *    we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and
 956  *    call cpu_fast_ecc_error via systrap. The clo_flags parameter is used
 957  *    to determine information such as TL, TT, CEEN and NCEEN settings, etc
 958  *    in the high level trap handler since we don't have access to detailed
 959  *    logout information in cases where the cpu_private struct is not yet
 960  *    initialized.
 961  *
 962  * We flush the E$ and D$ here on TL=1 code to prevent getting nested
 963  * Fast ECC traps in the TL=0 code.  If we get a Fast ECC event here in
 964  * the TL=1 code, we will go to the Fast ECC at TL>0 handler which,
 965  * since it is uses different code/data from this handler, has a better
 966  * chance of fixing things up than simply recursing through this code
 967  * again (this would probably cause an eventual kernel stack overflow).
 968  * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it
 969  * can flush the E$ (or the error is a stuck-at bit), we will recurse in
 970  * the Fast ECC at TL>0 handler and eventually Red Mode.
 971  *
 972  * Note that for Cheetah (and only Cheetah), we use alias addresses for
 973  * flushing rather than ASI accesses (which don't exist on Cheetah).
 974  * Should we encounter a Fast ECC error within this handler on Cheetah,
 975  * there's a good chance it's within the ecache_flushaddr buffer (since
 976  * it's the largest piece of memory we touch in the handler and it is
 977  * usually kernel text/data).  For that reason the Fast ECC at TL>0
 978  * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr.
 979  */
 980 
 981 /*
 982  * Cheetah ecc-protected E$ trap (Trap 70) at TL=0
 983  * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various
 984  * architecture-specific files.  
 985  * NB: Must be 8 instructions or less to fit in trap table and code must
 986  *     be relocatable.
 987  */
 988 
 989         ENTRY_NP(fecc_err_instr)
 990         membar  #Sync                   ! Cheetah requires membar #Sync
 991 
 992         /*
 993          * Save current DCU state.  Turn off the Dcache and Icache.
 994          */
 995         ldxa    [%g0]ASI_DCU, %g1       ! save DCU in %g1
 996         andn    %g1, DCU_DC + DCU_IC, %g4
 997         stxa    %g4, [%g0]ASI_DCU
 998         flush   %g0     /* flush required after changing the IC bit */
 999 
1000         ASM_JMP(%g4, fast_ecc_err)
1001         SET_SIZE(fecc_err_instr)
1002 
1003 
1004 #if !(defined(JALAPENO) || defined(SERRANO))
1005 
1006         .section ".text"
1007         .align  64
1008         ENTRY_NP(fast_ecc_err)
1009 
1010         /*
1011          * Turn off CEEN and NCEEN.
1012          */
1013         ldxa    [%g0]ASI_ESTATE_ERR, %g3
1014         andn    %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1015         stxa    %g4, [%g0]ASI_ESTATE_ERR
1016         membar  #Sync                   ! membar sync required
1017 
1018         /*
1019          * Check to see whether we need to park our sibling core
1020          * before recording diagnostic information from caches
1021          * which may be shared by both cores.
1022          * We use %g1 to store information about whether or not
1023          * we had to park the core (%g1 holds our DCUCR value and
1024          * we only use bits from that register which are "reserved"
1025          * to keep track of core parking) so that we know whether
1026          * or not to unpark later. %g5 and %g4 are scratch registers.
1027          */
1028         PARK_SIBLING_CORE(%g1, %g5, %g4)
1029 
1030         /*
1031          * Do the CPU log out capture.
1032          *   %g3 = "failed?" return value.
1033          *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1034          *         into this macro via %g4. Output only valid if cpu_private
1035          *         struct has not been initialized.
1036          *   CHPR_FECCTL0_LOGOUT = cpu logout structure offset input
1037          *   %g4 = Trap information stored in the cpu logout flags field
1038          *   %g5 = scr1
1039          *   %g6 = scr2
1040          *   %g3 = scr3
1041          *   %g4 = scr4
1042          */
1043          /* store the CEEN and NCEEN values, TL=0 */
1044         and     %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4
1045         set     CHPR_FECCTL0_LOGOUT, %g6
1046         DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1047 
1048         /*
1049          * Flush the Ecache (and L2 cache for Panther) to get the error out
1050          * of the Ecache.  If the UCC or UCU is on a dirty line, then the
1051          * following flush will turn that into a WDC or WDU, respectively.
1052          */
1053         PN_L2_FLUSHALL(%g4, %g5, %g6)
1054 
1055         CPU_INDEX(%g4, %g5)
1056         mulx    %g4, CPU_NODE_SIZE, %g4
1057         set     cpunodes, %g5
1058         add     %g4, %g5, %g4
1059         ld      [%g4 + ECACHE_LINESIZE], %g5
1060         ld      [%g4 + ECACHE_SIZE], %g4
1061 
1062         ASM_LDX(%g6, ecache_flushaddr)
1063         ECACHE_FLUSHALL(%g4, %g5, %g6, %g7)
1064 
1065         /*
1066          * Flush the Dcache.  Since bad data could have been installed in
1067          * the Dcache we must flush it before re-enabling it.
1068          */
1069         ASM_LD(%g5, dcache_size)
1070         ASM_LD(%g6, dcache_linesize)
1071         CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1072 
1073         /*
1074          * Flush the Icache.  Since we turned off the Icache to capture the
1075          * Icache line it is now stale or corrupted and we must flush it
1076          * before re-enabling it.
1077          */
1078         GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5);
1079         ld      [%g5 + CHPR_ICACHE_LINESIZE], %g6
1080         ba,pt   %icc, 6f
1081           ld    [%g5 + CHPR_ICACHE_SIZE], %g5
1082 fast_ecc_err_5:
1083         ASM_LD(%g5, icache_size)
1084         ASM_LD(%g6, icache_linesize)
1085 6:
1086         CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1087 
1088         /*
1089          * check to see whether we parked our sibling core at the start
1090          * of this handler. If so, we need to unpark it here.
1091          * We use DCUCR reserved bits (stored in %g1) to keep track of
1092          * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1093          */
1094         UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1095 
1096         /*
1097          * Restore the Dcache and Icache to the previous state.
1098          */
1099         stxa    %g1, [%g0]ASI_DCU
1100         flush   %g0     /* flush required after changing the IC bit */
1101 
1102         /*
1103          * Make sure our CPU logout operation was successful.
1104          */
1105         cmp     %g3, %g0
1106         be      8f
1107           nop
1108 
1109         /*
1110          * If the logout structure had been busy, how many times have
1111          * we tried to use it and failed (nesting count)? If we have
1112          * already recursed a substantial number of times, then we can
1113          * assume things are not going to get better by themselves and
1114          * so it would be best to panic.
1115          */
1116         cmp     %g3, CLO_NESTING_MAX
1117         blt     7f
1118           nop
1119 
1120         call ptl1_panic
1121           mov   PTL1_BAD_ECC, %g1
1122 
1123 7:      
1124         /*
1125          * Otherwise, if the logout structure was busy but we have not
1126          * nested more times than our maximum value, then we simply
1127          * issue a retry. Our TL=0 trap handler code will check and
1128          * clear the AFSR after it is done logging what is currently
1129          * in the logout struct and handle this event at that time.
1130          */
1131         retry
1132 8:
1133         /*
1134          * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're
1135          * already at PIL 15.
1136          */
1137         set     cpu_fast_ecc_error, %g1
1138         rdpr    %pil, %g4
1139         cmp     %g4, PIL_14
1140         ba      sys_trap
1141           movl  %icc, PIL_14, %g4
1142 
1143         SET_SIZE(fast_ecc_err)
1144 
1145 #endif  /* !(JALAPENO || SERRANO) */
1146 
1147 
1148 /*
1149  * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy:
1150  *
1151  * The basic flow of this trap handler is as follows:
1152  *
1153  * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a
1154  *    software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we
1155  *    will use to save %g1 and %g2.
1156  * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr),
1157  *    we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc
1158  *    handler (using the just saved %g1).
1159  * 3) Turn off the Dcache if it was on and save the state of the Dcache
1160  *    (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate.
1161  *    NB: we don't turn off the Icache because bad data is not installed nor
1162  *        will we be doing any diagnostic accesses.
1163  * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2
1164  * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the
1165  *    %tpc, %tnpc, %tstate values previously saved).
1166  * 6) set %tl to %tl - 1.
1167  * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure.
1168  * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field.
1169  * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear.  For
1170  *    Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear.
1171  *    Save the values in ch_err_tl1_data.  For Panther, read the shadow
1172  *    AFSR_EXT and save the value in ch_err_tl1_data.
1173  * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from
1174  *    being queued.  We'll report them via the AFSR/AFAR capture in step 13.
1175  * 11) Flush the Ecache.
1176  *    NB: the Ecache is flushed assuming the largest possible size with
1177  *        the smallest possible line size since access to the cpu_nodes may
1178  *        cause an unrecoverable DTLB miss.
1179  * 12) Reenable CEEN/NCEEN with the value saved from step 10.
1180  * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again.
1181  *    For Cheetah+ (and later), read the primary AFAR and AFSR and now clear.
1182  *    Save the read AFSR/AFAR values in ch_err_tl1_data.  For Panther,
1183  *    read and clear the primary AFSR_EXT and save it in ch_err_tl1_data.
1184  * 14) Flush and re-enable the Dcache if it was on at step 3.
1185  * 15) Do TRAPTRACE if enabled.
1186  * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so.
1187  * 17) Set the event pending flag in ch_err_tl1_pending[CPU]
1188  * 18) Cause a softint 15.  The pil15_interrupt handler will inspect the
1189  *    event pending flag and call cpu_tl1_error via systrap if set.
1190  * 19) Restore the registers from step 5 and issue retry.
1191  */
1192 
1193 /*
1194  * Cheetah ecc-protected E$ trap (Trap 70) at TL>0
1195  * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various
1196  * architecture-specific files.  This generates a "Software Trap 0" at TL>0,
1197  * which goes to fecc_err_tl1_cont_instr, and we continue the handling there.
1198  * NB: Must be 8 instructions or less to fit in trap table and code must
1199  *     be relocatable.
1200  */
1201 
1202         ENTRY_NP(fecc_err_tl1_instr)
1203         CH_ERR_TL1_TRAPENTRY(SWTRAP_0);
1204         SET_SIZE(fecc_err_tl1_instr)
1205 
1206 /*
1207  * Software trap 0 at TL>0.
1208  * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of
1209  * the various architecture-specific files.  This is used as a continuation
1210  * of the fast ecc handling where we've bought an extra TL level, so we can
1211  * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1212  * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1213  * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1214  * order two bits from %g1 and %g2 respectively).
1215  * NB: Must be 8 instructions or less to fit in trap table and code must
1216  *     be relocatable.
1217  */
1218 
1219         ENTRY_NP(fecc_err_tl1_cont_instr)
1220         CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err)
1221         SET_SIZE(fecc_err_tl1_cont_instr)
1222 
1223 
1224 /*
1225  * The ce_err function handles disrupting trap type 0x63 at TL=0.
1226  *
1227  * AFSR errors bits which cause this trap are:
1228  *      CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC
1229  *
1230  * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1231  * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU
1232  *
1233  * CEEN Bit of Cheetah External Cache Error Enable Register enables
1234  * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC
1235  *
1236  * Cheetah+ also handles (No additional processing required):
1237  *    DUE, DTO, DBERR   (NCEEN controlled)
1238  *    THCE              (CEEN and ET_ECC_en controlled)
1239  *    TUE               (ET_ECC_en controlled)
1240  *
1241  * Panther further adds:
1242  *    IMU, L3_EDU, L3_WDU, L3_CPU               (NCEEN controlled)
1243  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE      (CEEN controlled)
1244  *    TUE_SH, TUE               (NCEEN and L2_tag_ECC_en controlled)
1245  *    L3_TUE, L3_TUE_SH         (NCEEN and ET_ECC_en controlled)
1246  *    THCE                      (CEEN and L2_tag_ECC_en controlled)
1247  *    L3_THCE                   (CEEN and ET_ECC_en controlled)
1248  *
1249  * Steps:
1250  *      1. Disable hardware corrected disrupting errors only (CEEN)
1251  *      2. Park sibling core if caches are shared (to avoid race
1252  *         condition while accessing shared resources such as L3
1253  *         data staging register during CPU logout.
1254  *      3. If the CPU logout structure is not currently being used:
1255  *              4. Clear AFSR error bits
1256  *              5. Capture Ecache, Dcache and Icache lines associated
1257  *                 with AFAR.
1258  *              6. Unpark sibling core if we parked it earlier.
1259  *              7. call cpu_disrupting_error via sys_trap at PIL 14
1260  *                 unless we're already running at PIL 15.
1261  *      4. Otherwise, if the CPU logout structure is busy:
1262  *              5. Incriment "logout busy count" and place into %g3
1263  *              6. Unpark sibling core if we parked it earlier.
1264  *              7. Issue a retry since the other CPU error logging
1265  *                 code will end up finding this error bit and logging
1266  *                 information about it later.
1267  *      5. Alternatively (to 3 and 4 above), if the cpu_private struct is
1268  *         not yet initialized such that we can't even check the logout
1269  *         struct, then we place the clo_flags data into %g2
1270  *         (sys_trap->have_win arg #1) and call cpu_disrupting_error via
1271  *         systrap. The clo_flags parameter is used to determine information
1272  *         such as TL, TT, CEEN settings, etc in the high level trap
1273  *         handler since we don't have access to detailed logout information
1274  *         in cases where the cpu_private struct is not yet initialized.
1275  *
1276  * %g3: [ logout busy count ] - arg #2
1277  * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1278  */
1279 
1280         .align  128
1281         ENTRY_NP(ce_err)
1282         membar  #Sync                   ! Cheetah requires membar #Sync
1283 
1284         /*
1285          * Disable trap on hardware corrected errors (CEEN) while at TL=0
1286          * to prevent recursion.
1287          */
1288         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1289         bclr    EN_REG_CEEN, %g1
1290         stxa    %g1, [%g0]ASI_ESTATE_ERR
1291         membar  #Sync                   ! membar sync required
1292 
1293         /*
1294          * Save current DCU state.  Turn off Icache to allow capture of
1295          * Icache data by DO_CPU_LOGOUT.
1296          */
1297         ldxa    [%g0]ASI_DCU, %g1       ! save DCU in %g1
1298         andn    %g1, DCU_IC, %g4
1299         stxa    %g4, [%g0]ASI_DCU
1300         flush   %g0     /* flush required after changing the IC bit */
1301 
1302         /*
1303          * Check to see whether we need to park our sibling core
1304          * before recording diagnostic information from caches
1305          * which may be shared by both cores.
1306          * We use %g1 to store information about whether or not
1307          * we had to park the core (%g1 holds our DCUCR value and
1308          * we only use bits from that register which are "reserved"
1309          * to keep track of core parking) so that we know whether
1310          * or not to unpark later. %g5 and %g4 are scratch registers.
1311          */
1312         PARK_SIBLING_CORE(%g1, %g5, %g4)
1313 
1314         /*
1315          * Do the CPU log out capture.
1316          *   %g3 = "failed?" return value.
1317          *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1318          *         into this macro via %g4. Output only valid if cpu_private
1319          *         struct has not been initialized.
1320          *   CHPR_CECC_LOGOUT = cpu logout structure offset input
1321          *   %g4 = Trap information stored in the cpu logout flags field
1322          *   %g5 = scr1
1323          *   %g6 = scr2
1324          *   %g3 = scr3
1325          *   %g4 = scr4
1326          */
1327         clr     %g4                     ! TL=0 bit in afsr
1328         set     CHPR_CECC_LOGOUT, %g6
1329         DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1330 
1331         /*
1332          * Flush the Icache.  Since we turned off the Icache to capture the
1333          * Icache line it is now stale or corrupted and we must flush it
1334          * before re-enabling it.
1335          */
1336         GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1);
1337         ld      [%g5 + CHPR_ICACHE_LINESIZE], %g6
1338         ba,pt   %icc, 2f
1339           ld    [%g5 + CHPR_ICACHE_SIZE], %g5
1340 ce_err_1:
1341         ASM_LD(%g5, icache_size)
1342         ASM_LD(%g6, icache_linesize)
1343 2:
1344         CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1345 
1346         /*
1347          * check to see whether we parked our sibling core at the start
1348          * of this handler. If so, we need to unpark it here.
1349          * We use DCUCR reserved bits (stored in %g1) to keep track of
1350          * whether or not we need to unpark. %g5 and %g4 are scratch registers.
1351          */
1352         UNPARK_SIBLING_CORE(%g1, %g5, %g4)
1353 
1354         /*
1355          * Restore Icache to previous state.
1356          */
1357         stxa    %g1, [%g0]ASI_DCU
1358         flush   %g0     /* flush required after changing the IC bit */
1359         
1360         /*
1361          * Make sure our CPU logout operation was successful.
1362          */
1363         cmp     %g3, %g0
1364         be      4f
1365           nop
1366 
1367         /*
1368          * If the logout structure had been busy, how many times have
1369          * we tried to use it and failed (nesting count)? If we have
1370          * already recursed a substantial number of times, then we can
1371          * assume things are not going to get better by themselves and
1372          * so it would be best to panic.
1373          */
1374         cmp     %g3, CLO_NESTING_MAX
1375         blt     3f
1376           nop
1377 
1378         call ptl1_panic
1379           mov   PTL1_BAD_ECC, %g1
1380 
1381 3:
1382         /*
1383          * Otherwise, if the logout structure was busy but we have not
1384          * nested more times than our maximum value, then we simply
1385          * issue a retry. Our TL=0 trap handler code will check and
1386          * clear the AFSR after it is done logging what is currently
1387          * in the logout struct and handle this event at that time.
1388          */
1389         retry
1390 4:
1391         /*
1392          * Call cpu_disrupting_error via systrap at PIL 14 unless we're
1393          * already at PIL 15.
1394          */
1395         set     cpu_disrupting_error, %g1
1396         rdpr    %pil, %g4
1397         cmp     %g4, PIL_14
1398         ba      sys_trap
1399           movl  %icc, PIL_14, %g4
1400         SET_SIZE(ce_err)
1401 
1402 
1403         .align  64
1404         ENTRY_NP(ce_err_tl1)
1405 
1406         call ptl1_panic
1407           mov   PTL1_BAD_TRAP, %g1
1408 
1409         SET_SIZE(ce_err_tl1)
1410 
1411         
1412 /*
1413  * The async_err function handles deferred trap types 0xA 
1414  * (instruction_access_error) and 0x32 (data_access_error) at TL>=0.
1415  *
1416  * AFSR errors bits which cause this trap are:
1417  *      UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1418  * On some platforms, EMU may causes cheetah to pull the error pin
1419  * never giving Solaris a chance to take a trap.
1420  *
1421  * NCEEN Bit of Cheetah External Cache Error Enable Register enables
1422  * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR
1423  *
1424  * Steps:
1425  *      1. Disable CEEN and NCEEN errors to prevent recursive errors.
1426  *      2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture
1427  *         I$ line in DO_CPU_LOGOUT.
1428  *      3. Park sibling core if caches are shared (to avoid race
1429  *         condition while accessing shared resources such as L3
1430  *         data staging register during CPU logout.
1431  *      4. If the CPU logout structure is not currently being used:
1432  *              5. Clear AFSR error bits
1433  *              6. Capture Ecache, Dcache and Icache lines associated
1434  *                 with AFAR.
1435  *              7. Unpark sibling core if we parked it earlier.
1436  *              8. call cpu_deferred_error via sys_trap.
1437  *      5. Otherwise, if the CPU logout structure is busy:
1438  *              6. Incriment "logout busy count"
1439  *              7. Unpark sibling core if we parked it earlier.
1440  *              8) Issue a retry since the other CPU error logging
1441  *                 code will end up finding this error bit and logging
1442  *                 information about it later.
1443  *      6. Alternatively (to 4 and 5 above), if the cpu_private struct is
1444  *         not yet initialized such that we can't even check the logout
1445  *         struct, then we place the clo_flags data into %g2
1446  *         (sys_trap->have_win arg #1) and call cpu_deferred_error via
1447  *         systrap. The clo_flags parameter is used to determine information
1448  *         such as TL, TT, CEEN settings, etc in the high level trap handler
1449  *         since we don't have access to detailed logout information in cases
1450  *         where the cpu_private struct is not yet initialized.
1451  *
1452  * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1
1453  * %g3: [ logout busy count ] - arg #2
1454  */
1455 
1456         ENTRY_NP(async_err)
1457         membar  #Sync                   ! Cheetah requires membar #Sync
1458 
1459         /*
1460          * Disable CEEN and NCEEN.
1461          */
1462         ldxa    [%g0]ASI_ESTATE_ERR, %g3
1463         andn    %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4
1464         stxa    %g4, [%g0]ASI_ESTATE_ERR
1465         membar  #Sync                   ! membar sync required
1466 
1467         /*
1468          * Save current DCU state.
1469          * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT.
1470          * Do this regardless of whether this is a Data Access Error or
1471          * Instruction Access Error Trap.
1472          * Disable Dcache for both Data Access Error and Instruction Access
1473          * Error per Cheetah PRM P.5 Note 6.
1474          */
1475         ldxa    [%g0]ASI_DCU, %g1       ! save DCU in %g1
1476         andn    %g1, DCU_IC + DCU_DC, %g4
1477         stxa    %g4, [%g0]ASI_DCU
1478         flush   %g0     /* flush required after changing the IC bit */
1479 
1480         /*
1481          * Check to see whether we need to park our sibling core
1482          * before recording diagnostic information from caches
1483          * which may be shared by both cores.
1484          * We use %g1 to store information about whether or not
1485          * we had to park the core (%g1 holds our DCUCR value and
1486          * we only use bits from that register which are "reserved"
1487          * to keep track of core parking) so that we know whether
1488          * or not to unpark later. %g6 and %g4 are scratch registers.
1489          */
1490         PARK_SIBLING_CORE(%g1, %g6, %g4)
1491 
1492         /*
1493          * Do the CPU logout capture.
1494          *
1495          *   %g3 = "failed?" return value.
1496          *   %g2 = Input = AFAR. Output the clo_flags info which is passed
1497          *         into this macro via %g4. Output only valid if cpu_private
1498          *         struct has not been initialized.
1499          *   CHPR_ASYNC_LOGOUT = cpu logout structure offset input
1500          *   %g4 = Trap information stored in the cpu logout flags field
1501          *   %g5 = scr1
1502          *   %g6 = scr2
1503          *   %g3 = scr3
1504          *   %g4 = scr4
1505          */
1506         andcc   %g5, T_TL1, %g0 
1507         clr     %g6     
1508         movnz   %xcc, 1, %g6                    ! set %g6 if T_TL1 set
1509         sllx    %g6, CLO_FLAGS_TL_SHIFT, %g6
1510         sllx    %g5, CLO_FLAGS_TT_SHIFT, %g4
1511         set     CLO_FLAGS_TT_MASK, %g2
1512         and     %g4, %g2, %g4                   ! ttype
1513         or      %g6, %g4, %g4                   ! TT and TL
1514         and     %g3, EN_REG_CEEN, %g3           ! CEEN value
1515         or      %g3, %g4, %g4                   ! TT and TL and CEEN
1516         set     CHPR_ASYNC_LOGOUT, %g6
1517         DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4)
1518 
1519         /*
1520          * If the logout struct was busy, we may need to pass the
1521          * TT, TL, and CEEN information to the TL=0 handler via 
1522          * systrap parameter so save it off here.
1523          */
1524         cmp     %g3, %g0
1525         be      1f
1526           nop
1527         sllx    %g4, 32, %g4
1528         or      %g4, %g3, %g3
1529 1:
1530         /*
1531          * Flush the Icache.  Since we turned off the Icache to capture the
1532          * Icache line it is now stale or corrupted and we must flush it
1533          * before re-enabling it.
1534          */
1535         GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1);
1536         ld      [%g5 + CHPR_ICACHE_LINESIZE], %g6
1537         ba,pt   %icc, 2f
1538           ld    [%g5 + CHPR_ICACHE_SIZE], %g5
1539 async_err_1:
1540         ASM_LD(%g5, icache_size)
1541         ASM_LD(%g6, icache_linesize)
1542 2:
1543         CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4)
1544 
1545         /*
1546          * XXX - Don't we need to flush the Dcache before turning it back
1547          *       on to avoid stale or corrupt data? Was this broken?
1548          */
1549         /*
1550          * Flush the Dcache before turning it back on since it may now
1551          * contain stale or corrupt data.
1552          */
1553         ASM_LD(%g5, dcache_size)
1554         ASM_LD(%g6, dcache_linesize)
1555         CH_DCACHE_FLUSHALL(%g5, %g6, %g7)
1556 
1557         /*
1558          * check to see whether we parked our sibling core at the start
1559          * of this handler. If so, we need to unpark it here.
1560          * We use DCUCR reserved bits (stored in %g1) to keep track of
1561          * whether or not we need to unpark. %g5 and %g7 are scratch registers.
1562          */
1563         UNPARK_SIBLING_CORE(%g1, %g5, %g7)
1564 
1565         /*
1566          * Restore Icache and Dcache to previous state.
1567          */
1568         stxa    %g1, [%g0]ASI_DCU
1569         flush   %g0     /* flush required after changing the IC bit */
1570         
1571         /*
1572          * Make sure our CPU logout operation was successful.
1573          */
1574         cmp     %g3, %g0
1575         be      4f
1576           nop
1577 
1578         /*
1579          * If the logout structure had been busy, how many times have
1580          * we tried to use it and failed (nesting count)? If we have
1581          * already recursed a substantial number of times, then we can
1582          * assume things are not going to get better by themselves and
1583          * so it would be best to panic.
1584          */
1585         cmp     %g3, CLO_NESTING_MAX
1586         blt     3f
1587           nop
1588 
1589         call ptl1_panic
1590           mov   PTL1_BAD_ECC, %g1
1591 
1592 3:
1593         /*
1594          * Otherwise, if the logout structure was busy but we have not
1595          * nested more times than our maximum value, then we simply
1596          * issue a retry. Our TL=0 trap handler code will check and
1597          * clear the AFSR after it is done logging what is currently
1598          * in the logout struct and handle this event at that time.
1599          */
1600         retry
1601 4:
1602         RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1603 async_err_resetskip:
1604         set     cpu_deferred_error, %g1
1605         ba      sys_trap
1606           mov   PIL_15, %g4             ! run at pil 15
1607         SET_SIZE(async_err)
1608 
1609 #if defined(CPU_IMP_L1_CACHE_PARITY)
1610 
1611 /*
1612  * D$ parity error trap (trap 71) at TL=0.
1613  * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of
1614  * the various architecture-specific files.  This merely sets up the
1615  * arguments for cpu_parity_error and calls it via sys_trap.
1616  * NB: Must be 8 instructions or less to fit in trap table and code must
1617  *     be relocatable.
1618  */
1619         ENTRY_NP(dcache_parity_instr)
1620         membar  #Sync                   ! Cheetah+ requires membar #Sync
1621         set     cpu_parity_error, %g1
1622         or      %g0, CH_ERR_DPE, %g2
1623         rdpr    %tpc, %g3
1624         sethi   %hi(sys_trap), %g7
1625         jmp     %g7 + %lo(sys_trap)
1626           mov   PIL_15, %g4             ! run at pil 15
1627         SET_SIZE(dcache_parity_instr)
1628 
1629 
1630 /*
1631  * D$ parity error trap (trap 71) at TL>0.
1632  * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of
1633  * the various architecture-specific files.  This generates a "Software
1634  * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we
1635  * continue the handling there.
1636  * NB: Must be 8 instructions or less to fit in trap table and code must
1637  *     be relocatable.
1638  */
1639         ENTRY_NP(dcache_parity_tl1_instr)
1640         CH_ERR_TL1_TRAPENTRY(SWTRAP_1);
1641         SET_SIZE(dcache_parity_tl1_instr)
1642 
1643 
1644 /*
1645  * Software trap 1 at TL>0.
1646  * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap
1647  * of the various architecture-specific files.  This is used as a continuation
1648  * of the dcache parity handling where we've bought an extra TL level, so we
1649  * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1650  * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1651  * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1652  * order two bits from %g1 and %g2 respectively).
1653  * NB: Must be 8 instructions or less to fit in trap table and code must
1654  *     be relocatable.
1655  */
1656         ENTRY_NP(dcache_parity_tl1_cont_instr)
1657         CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err);
1658         SET_SIZE(dcache_parity_tl1_cont_instr)
1659 
1660 /*
1661  * D$ parity error at TL>0 handler
1662  * We get here via trap 71 at TL>0->Software trap 1 at TL>0.  We enter
1663  * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1664  */
1665 
1666         ENTRY_NP(dcache_parity_tl1_err)
1667 
1668         /*
1669          * This macro saves all the %g registers in the ch_err_tl1_data
1670          * structure, updates the ch_err_tl1_flags and saves the %tpc in
1671          * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
1672          * the ch_err_tl1_data structure and %g2 will have the original
1673          * flags in the ch_err_tl1_data structure.  All %g registers
1674          * except for %g1 and %g2 will be available.
1675          */
1676         CH_ERR_TL1_ENTER(CH_ERR_DPE);
1677 
1678 #ifdef TRAPTRACE
1679         /*
1680          * Get current trap trace entry physical pointer.
1681          */
1682         CPU_INDEX(%g6, %g5)
1683         sll     %g6, TRAPTR_SIZE_SHIFT, %g6
1684         set     trap_trace_ctl, %g5
1685         add     %g6, %g5, %g6
1686         ld      [%g6 + TRAPTR_LIMIT], %g5
1687         tst     %g5
1688         be      %icc, dpe_tl1_skip_tt
1689           nop
1690         ldx     [%g6 + TRAPTR_PBASE], %g5
1691         ld      [%g6 + TRAPTR_OFFSET], %g4
1692         add     %g5, %g4, %g5
1693 
1694         /*
1695          * Create trap trace entry.
1696          */
1697         rd      %asi, %g7
1698         wr      %g0, TRAPTR_ASI, %asi
1699         rd      STICK, %g4
1700         stxa    %g4, [%g5 + TRAP_ENT_TICK]%asi
1701         rdpr    %tl, %g4
1702         stha    %g4, [%g5 + TRAP_ENT_TL]%asi
1703         rdpr    %tt, %g4
1704         stha    %g4, [%g5 + TRAP_ENT_TT]%asi
1705         rdpr    %tpc, %g4
1706         stna    %g4, [%g5 + TRAP_ENT_TPC]%asi
1707         rdpr    %tstate, %g4
1708         stxa    %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1709         stna    %sp, [%g5 + TRAP_ENT_SP]%asi
1710         stna    %g0, [%g5 + TRAP_ENT_TR]%asi
1711         stna    %g0, [%g5 + TRAP_ENT_F1]%asi
1712         stna    %g0, [%g5 + TRAP_ENT_F2]%asi
1713         stna    %g0, [%g5 + TRAP_ENT_F3]%asi
1714         stna    %g0, [%g5 + TRAP_ENT_F4]%asi
1715         wr      %g0, %g7, %asi
1716 
1717         /*
1718          * Advance trap trace pointer.
1719          */
1720         ld      [%g6 + TRAPTR_OFFSET], %g5
1721         ld      [%g6 + TRAPTR_LIMIT], %g4
1722         st      %g5, [%g6 + TRAPTR_LAST_OFFSET]
1723         add     %g5, TRAP_ENT_SIZE, %g5
1724         sub     %g4, TRAP_ENT_SIZE, %g4
1725         cmp     %g5, %g4
1726         movge   %icc, 0, %g5
1727         st      %g5, [%g6 + TRAPTR_OFFSET]
1728 dpe_tl1_skip_tt:
1729 #endif  /* TRAPTRACE */
1730 
1731         /*
1732          * I$ and D$ are automatically turned off by HW when the CPU hits
1733          * a dcache or icache parity error so we will just leave those two
1734          * off for now to avoid repeating this trap.
1735          * For Panther, however, since we trap on P$ data parity errors
1736          * and HW does not automatically disable P$, we need to disable it
1737          * here so that we don't encounter any recursive traps when we
1738          * issue the retry.
1739          */
1740         ldxa    [%g0]ASI_DCU, %g3
1741         mov     1, %g4
1742         sllx    %g4, DCU_PE_SHIFT, %g4
1743         andn    %g3, %g4, %g3
1744         stxa    %g3, [%g0]ASI_DCU
1745         membar  #Sync
1746 
1747         /*
1748          * We fall into this macro if we've successfully logged the error in
1749          * the ch_err_tl1_data structure and want the PIL15 softint to pick
1750          * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
1751          * Restores the %g registers and issues retry.
1752          */
1753         CH_ERR_TL1_EXIT;
1754         SET_SIZE(dcache_parity_tl1_err)
1755 
1756 /*
1757  * I$ parity error trap (trap 72) at TL=0.
1758  * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of
1759  * the various architecture-specific files.  This merely sets up the
1760  * arguments for cpu_parity_error and calls it via sys_trap.
1761  * NB: Must be 8 instructions or less to fit in trap table and code must
1762  *     be relocatable.
1763  */
1764 
1765         ENTRY_NP(icache_parity_instr)
1766         membar  #Sync                   ! Cheetah+ requires membar #Sync
1767         set     cpu_parity_error, %g1
1768         or      %g0, CH_ERR_IPE, %g2
1769         rdpr    %tpc, %g3
1770         sethi   %hi(sys_trap), %g7
1771         jmp     %g7 + %lo(sys_trap)
1772           mov   PIL_15, %g4             ! run at pil 15
1773         SET_SIZE(icache_parity_instr)
1774 
1775 /*
1776  * I$ parity error trap (trap 72) at TL>0.
1777  * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of
1778  * the various architecture-specific files.  This generates a "Software
1779  * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we
1780  * continue the handling there.
1781  * NB: Must be 8 instructions or less to fit in trap table and code must
1782  *     be relocatable.
1783  */
1784         ENTRY_NP(icache_parity_tl1_instr)
1785         CH_ERR_TL1_TRAPENTRY(SWTRAP_2);
1786         SET_SIZE(icache_parity_tl1_instr)
1787 
1788 /*
1789  * Software trap 2 at TL>0.
1790  * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap
1791  * of the various architecture-specific files.  This is used as a continuation
1792  * of the icache parity handling where we've bought an extra TL level, so we
1793  * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1
1794  * and %g2.  Note that %tstate has bits 0-2 and then bits 8-19 as r/w,
1795  * there's a reserved hole from 3-7.  We only use bits 0-1 and 8-9 (the low
1796  * order two bits from %g1 and %g2 respectively).
1797  * NB: Must be 8 instructions or less to fit in trap table and code must
1798  *     be relocatable.
1799  */
1800         ENTRY_NP(icache_parity_tl1_cont_instr)
1801         CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err);
1802         SET_SIZE(icache_parity_tl1_cont_instr)
1803 
1804 
1805 /*
1806  * I$ parity error at TL>0 handler
1807  * We get here via trap 72 at TL>0->Software trap 2 at TL>0.  We enter
1808  * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate.
1809  */
1810 
1811         ENTRY_NP(icache_parity_tl1_err)
1812 
1813         /*
1814          * This macro saves all the %g registers in the ch_err_tl1_data
1815          * structure, updates the ch_err_tl1_flags and saves the %tpc in
1816          * ch_err_tl1_tpc.  At the end of this macro, %g1 will point to
1817          * the ch_err_tl1_data structure and %g2 will have the original
1818          * flags in the ch_err_tl1_data structure.  All %g registers
1819          * except for %g1 and %g2 will be available.
1820          */
1821         CH_ERR_TL1_ENTER(CH_ERR_IPE);
1822 
1823 #ifdef TRAPTRACE
1824         /*
1825          * Get current trap trace entry physical pointer.
1826          */
1827         CPU_INDEX(%g6, %g5)
1828         sll     %g6, TRAPTR_SIZE_SHIFT, %g6
1829         set     trap_trace_ctl, %g5
1830         add     %g6, %g5, %g6
1831         ld      [%g6 + TRAPTR_LIMIT], %g5
1832         tst     %g5
1833         be      %icc, ipe_tl1_skip_tt
1834           nop
1835         ldx     [%g6 + TRAPTR_PBASE], %g5
1836         ld      [%g6 + TRAPTR_OFFSET], %g4
1837         add     %g5, %g4, %g5
1838 
1839         /*
1840          * Create trap trace entry.
1841          */
1842         rd      %asi, %g7
1843         wr      %g0, TRAPTR_ASI, %asi
1844         rd      STICK, %g4
1845         stxa    %g4, [%g5 + TRAP_ENT_TICK]%asi
1846         rdpr    %tl, %g4
1847         stha    %g4, [%g5 + TRAP_ENT_TL]%asi
1848         rdpr    %tt, %g4
1849         stha    %g4, [%g5 + TRAP_ENT_TT]%asi
1850         rdpr    %tpc, %g4
1851         stna    %g4, [%g5 + TRAP_ENT_TPC]%asi
1852         rdpr    %tstate, %g4
1853         stxa    %g4, [%g5 + TRAP_ENT_TSTATE]%asi
1854         stna    %sp, [%g5 + TRAP_ENT_SP]%asi
1855         stna    %g0, [%g5 + TRAP_ENT_TR]%asi
1856         stna    %g0, [%g5 + TRAP_ENT_F1]%asi
1857         stna    %g0, [%g5 + TRAP_ENT_F2]%asi
1858         stna    %g0, [%g5 + TRAP_ENT_F3]%asi
1859         stna    %g0, [%g5 + TRAP_ENT_F4]%asi
1860         wr      %g0, %g7, %asi
1861 
1862         /*
1863          * Advance trap trace pointer.
1864          */
1865         ld      [%g6 + TRAPTR_OFFSET], %g5
1866         ld      [%g6 + TRAPTR_LIMIT], %g4
1867         st      %g5, [%g6 + TRAPTR_LAST_OFFSET]
1868         add     %g5, TRAP_ENT_SIZE, %g5
1869         sub     %g4, TRAP_ENT_SIZE, %g4
1870         cmp     %g5, %g4
1871         movge   %icc, 0, %g5
1872         st      %g5, [%g6 + TRAPTR_OFFSET]
1873 ipe_tl1_skip_tt:
1874 #endif  /* TRAPTRACE */
1875 
1876         /*
1877          * We fall into this macro if we've successfully logged the error in
1878          * the ch_err_tl1_data structure and want the PIL15 softint to pick
1879          * it up and log it.  %g1 must point to the ch_err_tl1_data structure.
1880          * Restores the %g registers and issues retry.
1881          */
1882         CH_ERR_TL1_EXIT;
1883 
1884         SET_SIZE(icache_parity_tl1_err)
1885 
1886 #endif  /* CPU_IMP_L1_CACHE_PARITY */
1887 
1888 
1889 /*
1890  * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1891  * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1892  * should only be used in places where you have no choice but to look at the
1893  * tlb itself.
1894  *
1895  * Note: These two routines are required by the Estar "cpr" loadable module.
1896  */
1897 
1898         ENTRY_NP(itlb_rd_entry)
1899         sllx    %o0, 3, %o0
1900         ldxa    [%o0]ASI_ITLB_ACCESS, %g1
1901         stx     %g1, [%o1]
1902         ldxa    [%o0]ASI_ITLB_TAGREAD, %g2
1903         set     TAGREAD_CTX_MASK, %o4
1904         andn    %g2, %o4, %o5
1905         retl
1906           stx   %o5, [%o2]
1907         SET_SIZE(itlb_rd_entry)
1908 
1909 
1910         ENTRY_NP(dtlb_rd_entry)
1911         sllx    %o0, 3, %o0
1912         ldxa    [%o0]ASI_DTLB_ACCESS, %g1
1913         stx     %g1, [%o1]
1914         ldxa    [%o0]ASI_DTLB_TAGREAD, %g2
1915         set     TAGREAD_CTX_MASK, %o4
1916         andn    %g2, %o4, %o5
1917         retl
1918           stx   %o5, [%o2]
1919         SET_SIZE(dtlb_rd_entry)
1920 
1921 
1922 #if !(defined(JALAPENO) || defined(SERRANO))
1923 
1924         ENTRY(get_safari_config)
1925         ldxa    [%g0]ASI_SAFARI_CONFIG, %o0
1926         retl
1927         nop
1928         SET_SIZE(get_safari_config)
1929 
1930 
1931         ENTRY(set_safari_config)
1932         stxa    %o0, [%g0]ASI_SAFARI_CONFIG
1933         membar  #Sync
1934         retl
1935         nop
1936         SET_SIZE(set_safari_config)
1937 
1938 #endif  /* !(JALAPENO || SERRANO) */
1939 
1940 
1941         /*
1942          * Clear the NPT (non-privileged trap) bit in the %tick/%stick
1943          * registers. In an effort to make the change in the
1944          * tick/stick counter as consistent as possible, we disable
1945          * all interrupts while we're changing the registers. We also
1946          * ensure that the read and write instructions are in the same
1947          * line in the instruction cache.
1948          */
1949         ENTRY_NP(cpu_clearticknpt)
1950         rdpr    %pstate, %g1            /* save processor state */
1951         andn    %g1, PSTATE_IE, %g3     /* turn off */
1952         wrpr    %g0, %g3, %pstate       /*   interrupts */
1953         rdpr    %tick, %g2              /* get tick register */
1954         brgez,pn %g2, 1f                /* if NPT bit off, we're done */
1955         mov     1, %g3                  /* create mask */
1956         sllx    %g3, 63, %g3            /*   for NPT bit */
1957         ba,a,pt %xcc, 2f
1958         .align  8                       /* Ensure rd/wr in same i$ line */
1959 2:
1960         rdpr    %tick, %g2              /* get tick register */
1961         wrpr    %g3, %g2, %tick         /* write tick register, */
1962                                         /*   clearing NPT bit   */
1963 1:
1964         rd      STICK, %g2              /* get stick register */
1965         brgez,pn %g2, 3f                /* if NPT bit off, we're done */
1966         mov     1, %g3                  /* create mask */
1967         sllx    %g3, 63, %g3            /*   for NPT bit */
1968         ba,a,pt %xcc, 4f
1969         .align  8                       /* Ensure rd/wr in same i$ line */
1970 4:
1971         rd      STICK, %g2              /* get stick register */
1972         wr      %g3, %g2, STICK         /* write stick register, */
1973                                         /*   clearing NPT bit   */
1974 3:
1975         jmp     %g4 + 4
1976         wrpr    %g0, %g1, %pstate       /* restore processor state */
1977         
1978         SET_SIZE(cpu_clearticknpt)
1979 
1980 
1981 #if defined(CPU_IMP_L1_CACHE_PARITY)
1982 
1983         ENTRY(correct_dcache_parity)
1984         /*
1985          * Register Usage:
1986          *
1987          * %o0 = input D$ size
1988          * %o1 = input D$ line size
1989          * %o2 = scratch
1990          * %o3 = scratch
1991          * %o4 = scratch
1992          */
1993 
1994         sub     %o0, %o1, %o0                   ! init cache line address
1995 
1996         /*
1997          * For Panther CPUs, we also need to clear the data parity bits
1998          * using DC_data_parity bit of the ASI_DCACHE_DATA register.
1999          */
2000         GET_CPU_IMPL(%o3)
2001         cmp     %o3, PANTHER_IMPL
2002         bne     1f
2003           clr   %o3                             ! zero for non-Panther
2004         mov     1, %o3
2005         sll     %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3
2006 
2007 1:
2008         /*
2009          * Set utag = way since it must be unique within an index.
2010          */
2011         srl     %o0, 14, %o2                    ! get cache way (DC_way)
2012         membar  #Sync                           ! required before ASI_DC_UTAG
2013         stxa    %o2, [%o0]ASI_DC_UTAG           ! set D$ utag = cache way
2014         membar  #Sync                           ! required after ASI_DC_UTAG
2015 
2016         /*
2017          * Zero line of D$ data (and data parity bits for Panther)
2018          */
2019         sub     %o1, 8, %o2
2020         or      %o0, %o3, %o4                   ! same address + DC_data_parity
2021 2:
2022         membar  #Sync                           ! required before ASI_DC_DATA
2023         stxa    %g0, [%o0 + %o2]ASI_DC_DATA     ! zero 8 bytes of D$ data
2024         membar  #Sync                           ! required after ASI_DC_DATA
2025         /*
2026          * We also clear the parity bits if this is a panther. For non-Panther
2027          * CPUs, we simply end up clearing the $data register twice.
2028          */
2029         stxa    %g0, [%o4 + %o2]ASI_DC_DATA
2030         membar  #Sync
2031 
2032         subcc   %o2, 8, %o2
2033         bge     2b
2034         nop
2035 
2036         subcc   %o0, %o1, %o0
2037         bge     1b
2038         nop
2039 
2040         retl
2041           nop
2042         SET_SIZE(correct_dcache_parity)
2043 
2044 #endif  /* CPU_IMP_L1_CACHE_PARITY */
2045 
2046 
2047         ENTRY_NP(stick_timestamp)
2048         rd      STICK, %g1      ! read stick reg
2049         sllx    %g1, 1, %g1
2050         srlx    %g1, 1, %g1     ! clear npt bit
2051 
2052         retl
2053         stx     %g1, [%o0]      ! store the timestamp
2054         SET_SIZE(stick_timestamp)
2055 
2056 
2057         ENTRY_NP(stick_adj)
2058         rdpr    %pstate, %g1            ! save processor state
2059         andn    %g1, PSTATE_IE, %g3
2060         ba      1f                      ! cache align stick adj
2061         wrpr    %g0, %g3, %pstate       ! turn off interrupts
2062 
2063         .align  16
2064 1:      nop
2065 
2066         rd      STICK, %g4              ! read stick reg
2067         add     %g4, %o0, %o1           ! adjust stick with skew
2068         wr      %o1, %g0, STICK         ! write stick reg
2069 
2070         retl
2071         wrpr    %g1, %pstate            ! restore processor state
2072         SET_SIZE(stick_adj)
2073 
2074         ENTRY_NP(kdi_get_stick)
2075         rd      STICK, %g1
2076         stx     %g1, [%o0]
2077         retl
2078         mov     %g0, %o0
2079         SET_SIZE(kdi_get_stick)
2080 
2081         ENTRY(dcache_inval_line)
2082         sll     %o0, 5, %o0             ! shift index into DC_way and DC_addr
2083         stxa    %g0, [%o0]ASI_DC_TAG    ! zero the DC_valid and DC_tag bits
2084         membar  #Sync
2085         retl
2086         nop
2087         SET_SIZE(dcache_inval_line)
2088 
2089         ENTRY(icache_inval_all)
2090         rdpr    %pstate, %o5
2091         andn    %o5, PSTATE_IE, %o3
2092         wrpr    %g0, %o3, %pstate       ! clear IE bit
2093 
2094         GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1);
2095         ld      [%o0 + CHPR_ICACHE_LINESIZE], %o1
2096         ba,pt   %icc, 2f
2097           ld    [%o0 + CHPR_ICACHE_SIZE], %o0
2098 icache_inval_all_1:
2099         ASM_LD(%o0, icache_size)
2100         ASM_LD(%o1, icache_linesize)
2101 2:
2102         CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4)
2103 
2104         retl
2105         wrpr    %g0, %o5, %pstate       ! restore earlier pstate
2106         SET_SIZE(icache_inval_all)
2107 
2108 
2109 /*
2110  * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a 
2111  * crosstrap.  It atomically increments the outstanding request counter and,
2112  * if there was not already an outstanding request, branches to setsoftint_tl1
2113  * to enqueue an intr_vec for the given inum.
2114  */
2115 
2116         ! Register usage:
2117         !
2118         ! Arguments:
2119         ! %g1 - inum
2120         ! %g2 - index into chsm_outstanding array
2121         !
2122         ! Internal:
2123         ! %g2, %g3, %g5 - scratch
2124         ! %g4 - ptr. to scrub_misc chsm_outstanding[index].
2125         ! %g6 - setsoftint_tl1 address
2126 
2127         ENTRY_NP(cache_scrubreq_tl1)
2128         mulx    %g2, CHSM_OUTSTANDING_INCR, %g2
2129         set     CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3
2130         add     %g2, %g3, %g2
2131         GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
2132         ld      [%g4], %g2              ! cpu's chsm_outstanding[index]
2133         !
2134         ! no need to use atomic instructions for the following
2135         ! increment - we're at tl1
2136         !
2137         add     %g2, 0x1, %g3
2138         brnz,pn %g2, 1f                 ! no need to enqueue more intr_vec
2139           st    %g3, [%g4]              ! delay - store incremented counter
2140         ASM_JMP(%g6, setsoftint_tl1)
2141         ! not reached
2142 1:
2143         retry
2144         SET_SIZE(cache_scrubreq_tl1)
2145 
2146 
2147 /*
2148  * Get the error state for the processor.
2149  * Note that this must not be used at TL>0
2150  */
2151         ENTRY(get_cpu_error_state)
2152 #if defined(CHEETAH_PLUS)
2153         set     ASI_SHADOW_REG_VA, %o2
2154         ldxa    [%o2]ASI_AFSR, %o1              ! shadow afsr reg
2155         stx     %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2156         ldxa    [%o2]ASI_AFAR, %o1              ! shadow afar reg
2157         stx     %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2158         GET_CPU_IMPL(%o3)       ! Only panther has AFSR_EXT registers
2159         cmp     %o3, PANTHER_IMPL
2160         bne,a   1f
2161           stx   %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]     ! zero for non-PN
2162         set     ASI_AFSR_EXT_VA, %o2
2163         ldxa    [%o2]ASI_AFSR, %o1              ! afsr_ext reg
2164         stx     %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2165         set     ASI_SHADOW_AFSR_EXT_VA, %o2
2166         ldxa    [%o2]ASI_AFSR, %o1              ! shadow afsr_ext reg
2167         stx     %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2168         b       2f
2169           nop
2170 1:
2171         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN
2172 2:
2173 #else   /* CHEETAH_PLUS */
2174         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR]
2175         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR]
2176         stx     %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT]
2177         stx     %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT]
2178 #endif  /* CHEETAH_PLUS */
2179 #if defined(SERRANO)
2180         /*
2181          * Serrano has an afar2 which captures the address on FRC/FRU errors.
2182          * We save this in the afar2 of the register save area.
2183          */
2184         set     ASI_MCU_AFAR2_VA, %o2
2185         ldxa    [%o2]ASI_MCU_CTRL, %o1
2186         stx     %o1, [%o0 + CH_CPU_ERRORS_AFAR2]
2187 #endif  /* SERRANO */
2188         ldxa    [%g0]ASI_AFSR, %o1              ! primary afsr reg
2189         stx     %o1, [%o0 + CH_CPU_ERRORS_AFSR]
2190         ldxa    [%g0]ASI_AFAR, %o1              ! primary afar reg
2191         retl
2192         stx     %o1, [%o0 + CH_CPU_ERRORS_AFAR]
2193         SET_SIZE(get_cpu_error_state)
2194 
2195         ENTRY(cpu_check_block)
2196         !
2197         ! get a new window with room for the error regs
2198         !
2199         save    %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp
2200         srl     %i1, 6, %l4             ! clear top bits of psz
2201                                         ! and divide by 64
2202         rd      %fprs, %l2              ! store FP
2203         wr      %g0, FPRS_FEF, %fprs    ! enable FP
2204 1:
2205         ldda    [%i0]ASI_BLK_P, %d0     ! load a block
2206         membar  #Sync
2207         ldxa    [%g0]ASI_AFSR, %l3      ! read afsr reg
2208         brz,a,pt %l3, 2f                ! check for error
2209         nop
2210 
2211         !
2212         ! if error, read the error regs and log it
2213         !
2214         call    get_cpu_error_state
2215         add     %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0
2216 
2217         !
2218         ! cpu_ce_detected(ch_cpu_errors_t *, flag)
2219         !
2220         call    cpu_ce_detected         ! log the error
2221         mov     CE_CEEN_TIMEOUT, %o1
2222 2:
2223         dec     %l4                     ! next 64-byte block
2224         brnz,a,pt  %l4, 1b
2225         add     %i0, 64, %i0            ! increment block addr
2226 
2227         wr      %l2, %g0, %fprs         ! restore FP
2228         ret
2229         restore
2230 
2231         SET_SIZE(cpu_check_block)
2232 
2233         ENTRY(cpu_delayed_logout)
2234         rdpr    %pstate, %o2
2235         andn    %o2, PSTATE_IE, %o2
2236         wrpr    %g0, %o2, %pstate               ! disable interrupts
2237         PARK_SIBLING_CORE(%o2, %o3, %o4)        ! %o2 has DCU value
2238         add     %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1
2239         rd      %asi, %g1
2240         wr      %g0, ASI_P, %asi
2241         GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5)
2242         wr      %g1, %asi
2243         UNPARK_SIBLING_CORE(%o2, %o3, %o4)      ! can use %o2 again
2244         rdpr    %pstate, %o2
2245         or      %o2, PSTATE_IE, %o2
2246         wrpr    %g0, %o2, %pstate
2247         retl
2248           nop
2249         SET_SIZE(cpu_delayed_logout)
2250 
2251         ENTRY(dtrace_blksuword32)
2252         save    %sp, -SA(MINFRAME + 4), %sp
2253 
2254         rdpr    %pstate, %l1
2255         andn    %l1, PSTATE_IE, %l2             ! disable interrupts to
2256         wrpr    %g0, %l2, %pstate               ! protect our FPU diddling
2257 
2258         rd      %fprs, %l0
2259         andcc   %l0, FPRS_FEF, %g0
2260         bz,a,pt %xcc, 1f                        ! if the fpu is disabled
2261         wr      %g0, FPRS_FEF, %fprs            ! ... enable the fpu
2262 
2263         st      %f0, [%fp + STACK_BIAS - 4]     ! save %f0 to the stack
2264 1:
2265         set     0f, %l5
2266         /*
2267          * We're about to write a block full or either total garbage
2268          * (not kernel data, don't worry) or user floating-point data
2269          * (so it only _looks_ like garbage).
2270          */
2271         ld      [%i1], %f0                      ! modify the block
2272         membar  #Sync
2273         stn     %l5, [THREAD_REG + T_LOFAULT]   ! set up the lofault handler
2274         stda    %d0, [%i0]ASI_BLK_COMMIT_S      ! store the modified block
2275         membar  #Sync
2276         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
2277 
2278         bz,a,pt %xcc, 1f
2279         wr      %g0, %l0, %fprs                 ! restore %fprs
2280 
2281         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
2282 1:
2283 
2284         wrpr    %g0, %l1, %pstate               ! restore interrupts
2285 
2286         ret
2287         restore %g0, %g0, %o0
2288 
2289 0:
2290         membar  #Sync
2291         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
2292 
2293         bz,a,pt %xcc, 1f
2294         wr      %g0, %l0, %fprs                 ! restore %fprs
2295 
2296         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
2297 1:
2298 
2299         wrpr    %g0, %l1, %pstate               ! restore interrupts
2300 
2301         /*
2302          * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
2303          * which deals with watchpoints. Otherwise, just return -1.
2304          */
2305         brnz,pt %i2, 1f
2306         nop
2307         ret
2308         restore %g0, -1, %o0
2309 1:
2310         call    dtrace_blksuword32_err
2311         restore
2312 
2313         SET_SIZE(dtrace_blksuword32)
2314 
2315 #ifdef  CHEETAHPLUS_ERRATUM_25
2316 
2317         ENTRY(claimlines)
2318 1:
2319         subcc   %o1, %o2, %o1
2320         add     %o0, %o1, %o3
2321         bgeu,a,pt       %xcc, 1b
2322         casxa   [%o3]ASI_MEM, %g0, %g0
2323         membar  #Sync
2324         retl
2325         nop
2326         SET_SIZE(claimlines)
2327 
2328         ENTRY(cpu_feature_init)
2329         save    %sp, -SA(MINFRAME), %sp
2330         sethi   %hi(cheetah_bpe_off), %o0
2331         ld      [%o0 + %lo(cheetah_bpe_off)], %o0
2332         brz     %o0, 1f
2333         nop
2334         rd      ASR_DISPATCH_CONTROL, %o0
2335         andn    %o0, ASR_DISPATCH_CONTROL_BPE, %o0
2336         wr      %o0, 0, ASR_DISPATCH_CONTROL
2337 1:
2338         !
2339         ! get the device_id and store the device_id
2340         ! in the appropriate cpunodes structure
2341         ! given the cpus index
2342         !
2343         CPU_INDEX(%o0, %o1)
2344         mulx %o0, CPU_NODE_SIZE, %o0
2345         set  cpunodes + DEVICE_ID, %o1
2346         ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2
2347         stx  %o2, [%o0 + %o1]
2348 #ifdef  CHEETAHPLUS_ERRATUM_34
2349         !
2350         ! apply Cheetah+ erratum 34 workaround
2351         !
2352         call itlb_erratum34_fixup
2353           nop
2354         call dtlb_erratum34_fixup
2355           nop
2356 #endif  /* CHEETAHPLUS_ERRATUM_34 */
2357         ret
2358           restore
2359         SET_SIZE(cpu_feature_init)
2360 
2361         ENTRY(copy_tsb_entry)
2362         ldda    [%o0]ASI_NQUAD_LD, %o2          ! %o2 = tag, %o3 = data
2363         stx     %o2, [%o1]
2364         stx     %o3, [%o1 + 8 ] 
2365         retl
2366         nop
2367         SET_SIZE(copy_tsb_entry)
2368 
2369 #endif  /* CHEETAHPLUS_ERRATUM_25 */
2370 
2371 #ifdef  CHEETAHPLUS_ERRATUM_34
2372 
2373         !
2374         ! In Cheetah+ erratum 34, under certain conditions an ITLB locked
2375         ! index 0 TTE will erroneously be displaced when a new TTE is
2376         ! loaded via ASI_ITLB_IN.  In order to avoid cheetah+ erratum 34,
2377         ! locked index 0 TTEs must be relocated.
2378         !
2379         ! NOTE: Care must be taken to avoid an ITLB miss in this routine.
2380         !
2381         ENTRY_NP(itlb_erratum34_fixup)
2382         rdpr    %pstate, %o3
2383 #ifdef DEBUG
2384         PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1)
2385 #endif /* DEBUG */
2386         wrpr    %o3, PSTATE_IE, %pstate         ! Disable interrupts
2387         ldxa    [%g0]ASI_ITLB_ACCESS, %o1       ! %o1 = entry 0 data
2388         ldxa    [%g0]ASI_ITLB_TAGREAD, %o2      ! %o2 = entry 0 tag
2389 
2390         cmp     %o1, %g0                        ! Is this entry valid?
2391         bge     %xcc, 1f
2392           andcc %o1, TTE_LCK_INT, %g0           ! Is this entry locked?
2393         bnz     %icc, 2f
2394           nop
2395 1:
2396         retl                                    ! Nope, outta here...
2397           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2398 2:
2399         sethi   %hi(FLUSH_ADDR), %o4
2400         stxa    %g0, [%o2]ASI_ITLB_DEMAP        ! Flush this mapping
2401         flush   %o4                             ! Flush required for I-MMU
2402         !
2403         ! Start search from index 1 up.  This is because the kernel force
2404         ! loads its text page at index 15 in sfmmu_kernel_remap() and we
2405         ! don't want our relocated entry evicted later.
2406         !
2407         ! NOTE: We assume that we'll be successful in finding an unlocked
2408         ! or invalid entry.  If that isn't the case there are bound to
2409         ! bigger problems.
2410         !
2411         set     (1 << 3), %g3
2412 3:
2413         ldxa    [%g3]ASI_ITLB_ACCESS, %o4       ! Load TTE from t16
2414         !
2415         ! If this entry isn't valid, we'll choose to displace it (regardless
2416         ! of the lock bit).
2417         !
2418         cmp     %o4, %g0                        ! TTE is > 0 iff not valid
2419         bge     %xcc, 4f                        ! If invalid, go displace
2420           andcc %o4, TTE_LCK_INT, %g0           ! Check for lock bit
2421         bnz,a   %icc, 3b                        ! If locked, look at next
2422           add   %g3, (1 << 3), %g3                !  entry
2423 4:
2424         !
2425         ! We found an unlocked or invalid entry; we'll explicitly load
2426         ! the former index 0 entry here.
2427         !
2428         sethi   %hi(FLUSH_ADDR), %o4
2429         set     MMU_TAG_ACCESS, %g4
2430         stxa    %o2, [%g4]ASI_IMMU
2431         stxa    %o1, [%g3]ASI_ITLB_ACCESS
2432         flush   %o4                             ! Flush required for I-MMU
2433         retl
2434           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2435         SET_SIZE(itlb_erratum34_fixup)
2436 
2437         !
2438         ! In Cheetah+ erratum 34, under certain conditions a DTLB locked
2439         ! index 0 TTE will erroneously be displaced when a new TTE is
2440         ! loaded.  In order to avoid cheetah+ erratum 34, locked index 0
2441         ! TTEs must be relocated.
2442         !
2443         ENTRY_NP(dtlb_erratum34_fixup)
2444         rdpr    %pstate, %o3
2445 #ifdef DEBUG
2446         PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1)
2447 #endif /* DEBUG */
2448         wrpr    %o3, PSTATE_IE, %pstate         ! Disable interrupts
2449         ldxa    [%g0]ASI_DTLB_ACCESS, %o1       ! %o1 = entry 0 data
2450         ldxa    [%g0]ASI_DTLB_TAGREAD, %o2      ! %o2 = entry 0 tag
2451 
2452         cmp     %o1, %g0                        ! Is this entry valid?
2453         bge     %xcc, 1f
2454           andcc %o1, TTE_LCK_INT, %g0           ! Is this entry locked?
2455         bnz     %icc, 2f
2456           nop
2457 1:
2458         retl                                    ! Nope, outta here...
2459           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2460 2:
2461         stxa    %g0, [%o2]ASI_DTLB_DEMAP        ! Flush this mapping
2462         membar  #Sync
2463         !
2464         ! Start search from index 1 up.
2465         !
2466         ! NOTE: We assume that we'll be successful in finding an unlocked
2467         ! or invalid entry.  If that isn't the case there are bound to
2468         ! bigger problems.
2469         !
2470         set     (1 << 3), %g3
2471 3:
2472         ldxa    [%g3]ASI_DTLB_ACCESS, %o4       ! Load TTE from t16
2473         !
2474         ! If this entry isn't valid, we'll choose to displace it (regardless
2475         ! of the lock bit).
2476         !
2477         cmp     %o4, %g0                        ! TTE is > 0 iff not valid
2478         bge     %xcc, 4f                        ! If invalid, go displace
2479           andcc %o4, TTE_LCK_INT, %g0           ! Check for lock bit
2480         bnz,a   %icc, 3b                        ! If locked, look at next
2481           add   %g3, (1 << 3), %g3                !  entry
2482 4:
2483         !
2484         ! We found an unlocked or invalid entry; we'll explicitly load
2485         ! the former index 0 entry here.
2486         !
2487         set     MMU_TAG_ACCESS, %g4
2488         stxa    %o2, [%g4]ASI_DMMU
2489         stxa    %o1, [%g3]ASI_DTLB_ACCESS
2490         membar  #Sync
2491         retl
2492           wrpr  %g0, %o3, %pstate               ! Enable interrupts
2493         SET_SIZE(dtlb_erratum34_fixup)
2494 
2495 #endif  /* CHEETAHPLUS_ERRATUM_34 */
2496