1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include "assym.h"
  27 
  28 #include <sys/asm_linkage.h>
  29 #include <sys/mmu.h>
  30 #include <vm/hat_sfmmu.h>
  31 #include <sys/machparam.h>
  32 #include <sys/machcpuvar.h>
  33 #include <sys/machthread.h>
  34 #include <sys/privregs.h>
  35 #include <sys/asm_linkage.h>
  36 #include <sys/machasi.h>
  37 #include <sys/trap.h>
  38 #include <sys/spitregs.h>
  39 #include <sys/xc_impl.h>
  40 #include <sys/intreg.h>
  41 #include <sys/async.h>
  42 
  43 #ifdef TRAPTRACE
  44 #include <sys/traptrace.h>
  45 #endif /* TRAPTRACE */
  46 
  47 /* BEGIN CSTYLED */
  48 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)                  \
  49         ldxa    [%g0]ASI_LSU, tmp1                                      ;\
  50         btst    LSU_DC, tmp1            /* is dcache enabled? */        ;\
  51         bz,pn   %icc, 1f                                                ;\
  52         sethi   %hi(dcache_linesize), tmp1                              ;\
  53         ld      [tmp1 + %lo(dcache_linesize)], tmp1                     ;\
  54         sethi   %hi(dflush_type), tmp2                                  ;\
  55         ld      [tmp2 + %lo(dflush_type)], tmp2                         ;\
  56         cmp     tmp2, FLUSHPAGE_TYPE                                    ;\
  57         be,pt   %icc, 2f                                                ;\
  58         sllx    arg1, SF_DC_VBIT_SHIFT, arg1    /* tag to compare */    ;\
  59         sethi   %hi(dcache_size), tmp3                                  ;\
  60         ld      [tmp3 + %lo(dcache_size)], tmp3                         ;\
  61         cmp     tmp2, FLUSHMATCH_TYPE                                   ;\
  62         be,pt   %icc, 3f                                                ;\
  63         nop                                                             ;\
  64         /*                                                              \
  65          * flushtype = FLUSHALL_TYPE, flush the whole thing             \
  66          * tmp3 = cache size                                            \
  67          * tmp1 = cache line size                                       \
  68          */                                                             \
  69         sub     tmp3, tmp1, tmp2                                        ;\
  70 4:                                                                      \
  71         stxa    %g0, [tmp2]ASI_DC_TAG                                   ;\
  72         membar  #Sync                                                   ;\
  73         cmp     %g0, tmp2                                               ;\
  74         bne,pt  %icc, 4b                                                ;\
  75         sub     tmp2, tmp1, tmp2                                        ;\
  76         ba,pt   %icc, 1f                                                ;\
  77         nop                                                             ;\
  78         /*                                                              \
  79          * flushtype = FLUSHPAGE_TYPE                                   \
  80          * arg1 = tag to compare against                                \
  81          * arg2 = virtual color                                         \
  82          * tmp1 = cache line size                                       \
  83          * tmp2 = tag from cache                                        \
  84          * tmp3 = counter                                               \
  85          */                                                             \
  86 2:                                                                      \
  87         set     MMU_PAGESIZE, tmp3                                      ;\
  88         sllx    arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */   ;\
  89         sub     tmp3, tmp1, tmp3                                        ;\
  90 4:                                                                      \
  91         ldxa    [arg2 + tmp3]ASI_DC_TAG, tmp2   /* read tag */          ;\
  92         btst    SF_DC_VBIT_MASK, tmp2                                   ;\
  93         bz,pn   %icc, 5f          /* branch if no valid sub-blocks */   ;\
  94         andn    tmp2, SF_DC_VBIT_MASK, tmp2     /* clear out v bits */  ;\
  95         cmp     tmp2, arg1                                              ;\
  96         bne,pn  %icc, 5f                        /* br if tag miss */    ;\
  97         nop                                                             ;\
  98         stxa    %g0, [arg2 + tmp3]ASI_DC_TAG                            ;\
  99         membar  #Sync                                                   ;\
 100 5:                                                                      \
 101         cmp     %g0, tmp3                                               ;\
 102         bnz,pt  %icc, 4b                /* branch if not done */        ;\
 103         sub     tmp3, tmp1, tmp3                                        ;\
 104         ba,pt   %icc, 1f                                                ;\
 105         nop                                                             ;\
 106         /*                                                              \
 107          * flushtype = FLUSHMATCH_TYPE                                  \
 108          * arg1 = tag to compare against                                \
 109          * tmp1 = cache line size                                       \
 110          * tmp3 = cache size                                            \
 111          * arg2 = counter                                               \
 112          * tmp2 = cache tag                                             \
 113          */                                                             \
 114 3:                                                                      \
 115         sub     tmp3, tmp1, arg2                                        ;\
 116 4:                                                                      \
 117         ldxa    [arg2]ASI_DC_TAG, tmp2          /* read tag */          ;\
 118         btst    SF_DC_VBIT_MASK, tmp2                                   ;\
 119         bz,pn   %icc, 5f                /* br if no valid sub-blocks */ ;\
 120         andn    tmp2, SF_DC_VBIT_MASK, tmp2     /* clear out v bits */  ;\
 121         cmp     tmp2, arg1                                              ;\
 122         bne,pn  %icc, 5f                /* branch if tag miss */        ;\
 123         nop                                                             ;\
 124         stxa    %g0, [arg2]ASI_DC_TAG                                   ;\
 125         membar  #Sync                                                   ;\
 126 5:                                                                      \
 127         cmp     %g0, arg2                                               ;\
 128         bne,pt  %icc, 4b                /* branch if not done */        ;\
 129         sub     arg2, tmp1, arg2                                        ;\
 130 1:
 131 
 132 /*
 133  * macro that flushes the entire dcache color
 134  */
 135 #define DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)                              \
 136         ldxa    [%g0]ASI_LSU, tmp1;                                     \
 137         btst    LSU_DC, tmp1;           /* is dcache enabled? */        \
 138         bz,pn   %icc, 1f;                                               \
 139         sethi   %hi(dcache_linesize), tmp1;                             \
 140         ld      [tmp1 + %lo(dcache_linesize)], tmp1;                    \
 141         set     MMU_PAGESIZE, tmp2;                                     \
 142         /*                                                              \
 143          * arg = virtual color                                          \
 144          * tmp2 = page size                                             \
 145          * tmp1 = cache line size                                       \
 146          */                                                             \
 147         sllx    arg, MMU_PAGESHIFT, arg; /* color to dcache page */     \
 148         sub     tmp2, tmp1, tmp2;                                       \
 149 2:                                                                      \
 150         stxa    %g0, [arg + tmp2]ASI_DC_TAG;                            \
 151         membar  #Sync;                                                  \
 152         cmp     %g0, tmp2;                                              \
 153         bne,pt  %icc, 2b;                                               \
 154         sub     tmp2, tmp1, tmp2;                                       \
 155 1:
 156 
 157 /*
 158  * macro that flushes the entire dcache
 159  */
 160 #define DCACHE_FLUSHALL(size, linesize, tmp)                            \
 161         ldxa    [%g0]ASI_LSU, tmp;                                      \
 162         btst    LSU_DC, tmp;            /* is dcache enabled? */        \
 163         bz,pn   %icc, 1f;                                               \
 164                                                                         \
 165         sub     size, linesize, tmp;                                    \
 166 2:                                                                      \
 167         stxa    %g0, [tmp]ASI_DC_TAG;                                   \
 168         membar  #Sync;                                                  \
 169         cmp     %g0, tmp;                                               \
 170         bne,pt  %icc, 2b;                                               \
 171         sub     tmp, linesize, tmp;                                     \
 172 1:
 173 
 174 /*
 175  * macro that flushes the entire icache
 176  */
 177 #define ICACHE_FLUSHALL(size, linesize, tmp)                            \
 178         ldxa    [%g0]ASI_LSU, tmp;                                      \
 179         btst    LSU_IC, tmp;                                            \
 180         bz,pn   %icc, 1f;                                               \
 181                                                                         \
 182         sub     size, linesize, tmp;                                    \
 183 2:                                                                      \
 184         stxa    %g0, [tmp]ASI_IC_TAG;                                   \
 185         membar  #Sync;                                                  \
 186         cmp     %g0, tmp;                                               \
 187         bne,pt  %icc, 2b;                                               \
 188         sub     tmp, linesize, tmp;                                     \
 189 1:
 190 
 191 #ifdef SF_ERRATA_32
 192 #define SF_WORKAROUND(tmp1, tmp2)                               \
 193         sethi   %hi(FLUSH_ADDR), tmp2                           ;\
 194         set     MMU_PCONTEXT, tmp1                              ;\
 195         stxa    %g0, [tmp1]ASI_DMMU                             ;\
 196         flush   tmp2                                            ;
 197 #else
 198 #define SF_WORKAROUND(tmp1, tmp2)
 199 #endif /* SF_ERRATA_32 */
 200 
 201 /*
 202  * arg1 = vaddr
 203  * arg2 = ctxnum
 204  *      - disable interrupts and clear address mask
 205  *        to access 64 bit physaddr
 206  *      - Blow out the TLB, flush user page.
 207  *        . use secondary context.
 208  */
 209 #define VTAG_FLUSHUPAGE(lbl, arg1, arg2, tmp1, tmp2, tmp3, tmp4) \
 210         rdpr    %pstate, tmp1                                   ;\
 211         andn    tmp1, PSTATE_IE, tmp2                           ;\
 212         wrpr    tmp2, 0, %pstate                                ;\
 213         sethi   %hi(FLUSH_ADDR), tmp2                           ;\
 214         set     MMU_SCONTEXT, tmp3                              ;\
 215         ldxa    [tmp3]ASI_DMMU, tmp4                            ;\
 216         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, arg1, arg1      ;\
 217         cmp     tmp4, arg2                                      ;\
 218         be,a,pt %icc, lbl/**/4                                  ;\
 219           nop                                                   ;\
 220         stxa    arg2, [tmp3]ASI_DMMU                            ;\
 221 lbl/**/4:                                                       ;\
 222         stxa    %g0, [arg1]ASI_DTLB_DEMAP                       ;\
 223         stxa    %g0, [arg1]ASI_ITLB_DEMAP                       ;\
 224         flush   tmp2                                            ;\
 225         be,a,pt %icc, lbl/**/5                                  ;\
 226           nop                                                   ;\
 227         stxa    tmp4, [tmp3]ASI_DMMU                            ;\
 228         flush   tmp2                                            ;\
 229 lbl/**/5:                                                       ;\
 230         wrpr    %g0, tmp1, %pstate
 231 
 232         
 233 /*
 234  * macro that flushes all the user entries in dtlb
 235  * arg1 = dtlb entries
 236  *      - Before first compare:
 237  *              tmp4 = tte
 238  *              tmp5 = vaddr
 239  *              tmp6 = cntxnum
 240  */
 241 #define DTLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
 242                                 tmp4, tmp5, tmp6) \
 243 lbl/**/0:                                                       ;\
 244         sllx    arg1, 3, tmp3                                   ;\
 245         SF_WORKAROUND(tmp1, tmp2)                               ;\
 246         ldxa    [tmp3]ASI_DTLB_ACCESS, tmp4                     ;\
 247         srlx    tmp4, 6, tmp4                                   ;\
 248         andcc   tmp4, 1, %g0                                    ;\
 249         bnz,pn  %xcc, lbl/**/1                                  ;\
 250         srlx    tmp4, 57, tmp4                                  ;\
 251         andcc   tmp4, 1, %g0                                    ;\
 252         beq,pn  %xcc, lbl/**/1                                  ;\
 253           nop                                                   ;\
 254         set     TAGREAD_CTX_MASK, tmp1                          ;\
 255         ldxa    [tmp3]ASI_DTLB_TAGREAD, tmp2                    ;\
 256         and     tmp2, tmp1, tmp6                                ;\
 257         andn    tmp2, tmp1, tmp5                                ;\
 258         set     KCONTEXT, tmp4                                  ;\
 259         cmp     tmp6, tmp4                                      ;\
 260         be      lbl/**/1                                        ;\
 261           nop                                                   ;\
 262         VTAG_FLUSHUPAGE(VD/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
 263 lbl/**/1:                                                       ;\
 264         brgz,pt arg1, lbl/**/0                                  ;\
 265           sub     arg1, 1, arg1
 266 
 267 
 268 /*
 269  * macro that flushes all the user entries in itlb      
 270  * arg1 = itlb entries
 271  *      - Before first compare:
 272  *              tmp4 = tte
 273  *              tmp5 = vaddr
 274  *              tmp6 = cntxnum
 275  */
 276 #define ITLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
 277                                 tmp4, tmp5, tmp6) \
 278 lbl/**/0:                                                       ;\
 279         sllx    arg1, 3, tmp3                                   ;\
 280         SF_WORKAROUND(tmp1, tmp2)                               ;\
 281         ldxa    [tmp3]ASI_ITLB_ACCESS, tmp4                     ;\
 282         srlx    tmp4, 6, tmp4                                   ;\
 283         andcc   tmp4, 1, %g0                                    ;\
 284         bnz,pn  %xcc, lbl/**/1                                  ;\
 285         srlx    tmp4, 57, tmp4                                  ;\
 286         andcc   tmp4, 1, %g0                                    ;\
 287         beq,pn  %xcc, lbl/**/1                                  ;\
 288           nop                                                   ;\
 289         set     TAGREAD_CTX_MASK, tmp1                          ;\
 290         ldxa    [tmp3]ASI_ITLB_TAGREAD, tmp2                    ;\
 291         and     tmp2, tmp1, tmp6                                ;\
 292         andn    tmp2, tmp1, tmp5                                ;\
 293         set     KCONTEXT, tmp4                                  ;\
 294         cmp     tmp6, tmp4                                      ;\
 295         be      lbl/**/1                                        ;\
 296           nop                                                   ;\
 297         VTAG_FLUSHUPAGE(VI/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
 298 lbl/**/1:                                                       ;\
 299         brgz,pt arg1, lbl/**/0                                  ;\
 300         sub     arg1, 1, arg1
 301 
 302 
 303         
 304 /*
 305  * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
 306  * ptr is in the machcpu structure.
 307  * r_or_s:      Register or symbol off offset from 'cpu_private' ptr.
 308  * scr1:        Scratch, ptr is returned in this register.
 309  * scr2:        Scratch
 310  */
 311 #define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)          \
 312         CPU_ADDR(scr1, scr2);                                           \
 313         ldn     [scr1 + CPU_PRIVATE], scr1;                             \
 314         cmp     scr1, 0;                                                \
 315         be      label;                                                  \
 316          nop;                                                           \
 317         add     scr1, r_or_s, scr1;                                     \
 318 
 319 #ifdef HUMMINGBIRD
 320 /*
 321  * UltraSPARC-IIe processor supports both 4-way set associative and
 322  * direct map E$. For performance reasons, we flush E$ by placing it
 323  * in direct map mode for data load/store and restore the state after
 324  * we are done flushing it. Keep interrupts off while flushing in this
 325  * manner.
 326  *
 327  * We flush the entire ecache by starting at one end and loading each
 328  * successive ecache line for the 2*ecache-size range. We have to repeat
 329  * the flush operation to guarantee that the entire ecache has been
 330  * flushed.
 331  *
 332  * For flushing a specific physical address, we start at the aliased
 333  * address and load at set-size stride, wrapping around at 2*ecache-size
 334  * boundary and skipping the physical address being flushed. It takes
 335  * 10 loads to guarantee that the physical address has been flushed.
 336  */
 337 
 338 #define HB_ECACHE_FLUSH_CNT     2
 339 #define HB_PHYS_FLUSH_CNT       10      /* #loads to flush specific paddr */
 340 #endif /* HUMMINGBIRD */
 341 
 342 /* END CSTYLED */
 343 
 344 /*
 345  * Spitfire MMU and Cache operations.
 346  */
 347 
 348         ENTRY_NP(vtag_flushpage)
 349         /*
 350          * flush page from the tlb
 351          *
 352          * %o0 = vaddr
 353          * %o1 = sfmmup
 354          */
 355         rdpr    %pstate, %o5
 356 #ifdef DEBUG
 357         PANIC_IF_INTR_DISABLED_PSTR(%o5, sfdi_label1, %g1)
 358 #endif /* DEBUG */
 359         /*
 360          * disable ints
 361          */
 362         andn    %o5, PSTATE_IE, %o4
 363         wrpr    %o4, 0, %pstate
 364 
 365         /*
 366          * Then, blow out the tlb
 367          * Interrupts are disabled to prevent the secondary ctx register
 368          * from changing underneath us.
 369          */
 370         sethi   %hi(ksfmmup), %o3
 371         ldx     [%o3 + %lo(ksfmmup)], %o3
 372         cmp     %o3, %o1
 373         bne,pt   %xcc, 1f                       ! if not kernel as, go to 1
 374           sethi %hi(FLUSH_ADDR), %o3
 375         /*
 376          * For KCONTEXT demaps use primary. type = page implicitly
 377          */
 378         stxa    %g0, [%o0]ASI_DTLB_DEMAP        /* dmmu flush for KCONTEXT */
 379         stxa    %g0, [%o0]ASI_ITLB_DEMAP        /* immu flush for KCONTEXT */
 380         flush   %o3
 381         b       5f
 382           nop
 383 1:
 384         /*
 385          * User demap.  We need to set the secondary context properly.
 386          * %o0 = vaddr
 387          * %o1 = sfmmup
 388          * %o3 = FLUSH_ADDR
 389          */
 390         SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
 391         
 392         set     MMU_SCONTEXT, %o4
 393         ldxa    [%o4]ASI_DMMU, %o2              /* rd old ctxnum */
 394         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
 395         cmp     %o2, %g1
 396         be,pt   %icc, 4f
 397           nop
 398         stxa    %g1, [%o4]ASI_DMMU              /* wr new ctxum */
 399 4:
 400         stxa    %g0, [%o0]ASI_DTLB_DEMAP
 401         stxa    %g0, [%o0]ASI_ITLB_DEMAP
 402         flush   %o3
 403         be,pt   %icc, 5f
 404           nop
 405         stxa    %o2, [%o4]ASI_DMMU              /* restore old ctxnum */
 406         flush   %o3
 407 5:
 408         retl
 409           wrpr  %g0, %o5, %pstate               /* enable interrupts */
 410         SET_SIZE(vtag_flushpage)
 411         
 412         .seg    ".text"
 413 .flushallmsg:
 414         .asciz  "sfmmu_asm: unimplemented flush operation"
 415 
 416         ENTRY_NP(vtag_flushall)
 417         sethi   %hi(.flushallmsg), %o0
 418         call    panic
 419           or    %o0, %lo(.flushallmsg), %o0
 420         SET_SIZE(vtag_flushall)
 421 
 422         ENTRY_NP(vtag_flushall_uctxs)
 423         /*
 424          * flush entire DTLB/ITLB.
 425          */
 426         CPU_INDEX(%g1, %g2)
 427         mulx    %g1, CPU_NODE_SIZE, %g1
 428         set     cpunodes, %g2
 429         add     %g1, %g2, %g1
 430         lduh    [%g1 + ITLB_SIZE], %g2          ! %g2 = # entries in ITLB
 431         lduh    [%g1 + DTLB_SIZE], %g1          ! %g1 = # entries in DTLB
 432         sub     %g2, 1, %g2                     ! %g2 = # entries in ITLB - 1
 433         sub     %g1, 1, %g1                     ! %g1 = # entries in DTLB - 1
 434 
 435         !
 436         ! Flush itlb's
 437         !
 438         ITLB_FLUSH_UNLOCKED_UCTXS(I, %g2, %g3, %g4, %o2, %o3, %o4, %o5)
 439 
 440         !
 441         ! Flush dtlb's
 442         !
 443         DTLB_FLUSH_UNLOCKED_UCTXS(D, %g1, %g3, %g4, %o2, %o3, %o4, %o5)
 444 
 445         membar  #Sync
 446         retl
 447           nop
 448         
 449         SET_SIZE(vtag_flushall_uctxs)
 450 
 451         ENTRY_NP(vtag_flushpage_tl1)
 452         /*
 453          * x-trap to flush page from tlb and tsb
 454          *
 455          * %g1 = vaddr, zero-extended on 32-bit kernel
 456          * %g2 = sfmmup
 457          *
 458          * assumes TSBE_TAG = 0
 459          */
 460         srln    %g1, MMU_PAGESHIFT, %g1
 461         slln    %g1, MMU_PAGESHIFT, %g1                 /* g1 = vaddr */
 462         
 463         SFMMU_CPU_CNUM(%g2, %g3, %g4)   /* %g3 = sfmmu cnum on this CPU */
 464 
 465         /* We need to set the secondary context properly. */
 466         set     MMU_SCONTEXT, %g4
 467         ldxa    [%g4]ASI_DMMU, %g5              /* rd old ctxnum */
 468         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
 469         stxa    %g3, [%g4]ASI_DMMU              /* wr new ctxum */
 470         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 471         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 472         stxa    %g5, [%g4]ASI_DMMU              /* restore old ctxnum */
 473         membar #Sync
 474         retry
 475         SET_SIZE(vtag_flushpage_tl1)
 476 
 477         ENTRY_NP(vtag_flush_pgcnt_tl1)
 478         /*
 479          * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
 480          *
 481          * %g1 = vaddr, zero-extended on 32-bit kernel
 482          * %g2 = <sfmmup58 | pgcnt6>
 483          *
 484          * NOTE: this handler relies on the fact that no
 485          *      interrupts or traps can occur during the loop
 486          *      issuing the TLB_DEMAP operations. It is assumed
 487          *      that interrupts are disabled and this code is
 488          *      fetching from the kernel locked text address.
 489          *
 490          * assumes TSBE_TAG = 0
 491          */
 492         srln    %g1, MMU_PAGESHIFT, %g1
 493         slln    %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 494         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
 495         
 496         set     SFMMU_PGCNT_MASK, %g4
 497         and     %g4, %g2, %g3                   /* g3 = pgcnt - 1 */
 498         add     %g3, 1, %g3                     /* g3 = pgcnt */
 499 
 500         andn    %g2, SFMMU_PGCNT_MASK, %g2      /* g2 = sfmmup */
 501 
 502         SFMMU_CPU_CNUM(%g2, %g5, %g6)   ! %g5 = sfmmu cnum on this CPU
 503 
 504         /* We need to set the secondary context properly. */
 505         set     MMU_SCONTEXT, %g4
 506         ldxa    [%g4]ASI_DMMU, %g6              /* read old ctxnum */
 507         stxa    %g5, [%g4]ASI_DMMU              /* write new ctxum */
 508 
 509         set     MMU_PAGESIZE, %g2               /* g2 = pgsize */
 510         sethi    %hi(FLUSH_ADDR), %g5
 511 1:
 512         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 513         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 514         flush   %g5
 515         deccc   %g3                             /* decr pgcnt */
 516         bnz,pt  %icc,1b
 517           add   %g1, %g2, %g1                   /* go to nextpage */
 518 
 519         stxa    %g6, [%g4]ASI_DMMU              /* restore old ctxnum */
 520         membar #Sync
 521         retry
 522         SET_SIZE(vtag_flush_pgcnt_tl1)
 523 
 524         ! Not implemented on US1/US2
 525         ENTRY_NP(vtag_flushall_tl1)
 526         retry
 527         SET_SIZE(vtag_flushall_tl1)
 528 
 529 /*
 530  * vac_flushpage(pfnum, color)
 531  *      Flush 1 8k page of the D-$ with physical page = pfnum
 532  *      Algorithm:
 533  *              The spitfire dcache is a 16k direct mapped virtual indexed,
 534  *              physically tagged cache.  Given the pfnum we read all cache
 535  *              lines for the corresponding page in the cache (determined by
 536  *              the color).  Each cache line is compared with
 537  *              the tag created from the pfnum. If the tags match we flush
 538  *              the line.
 539  */
 540         .seg    ".data"
 541         .align  8
 542         .global dflush_type
 543 dflush_type:
 544         .word   FLUSHPAGE_TYPE
 545         .seg    ".text"
 546 
 547         ENTRY(vac_flushpage)
 548         /*
 549          * flush page from the d$
 550          *
 551          * %o0 = pfnum, %o1 = color
 552          */
 553         DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
 554         retl
 555         nop
 556         SET_SIZE(vac_flushpage)
 557 
 558         ENTRY_NP(vac_flushpage_tl1)
 559         /*
 560          * x-trap to flush page from the d$
 561          *
 562          * %g1 = pfnum, %g2 = color
 563          */
 564         DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
 565         retry
 566         SET_SIZE(vac_flushpage_tl1)
 567 
 568         ENTRY(vac_flushcolor)
 569         /*
 570          * %o0 = vcolor
 571          */
 572         DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
 573         retl
 574           nop
 575         SET_SIZE(vac_flushcolor)
 576 
 577         ENTRY(vac_flushcolor_tl1)
 578         /*
 579          * %g1 = vcolor
 580          */
 581         DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
 582         retry
 583         SET_SIZE(vac_flushcolor_tl1)
 584 
 585 
 586         .global _dispatch_status_busy
 587 _dispatch_status_busy:
 588         .asciz  "ASI_INTR_DISPATCH_STATUS error: busy"
 589         .align  4
 590 
 591 /*
 592  * Determine whether or not the IDSR is busy.
 593  * Entry: no arguments
 594  * Returns: 1 if busy, 0 otherwise
 595  */
 596         ENTRY(idsr_busy)
 597         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 598         clr     %o0
 599         btst    IDSR_BUSY, %g1
 600         bz,a,pt %xcc, 1f
 601         mov     1, %o0
 602 1:
 603         retl
 604         nop
 605         SET_SIZE(idsr_busy)
 606         
 607 /*
 608  * Setup interrupt dispatch data registers
 609  * Entry:
 610  *      %o0 - function or inumber to call
 611  *      %o1, %o2 - arguments (2 uint64_t's)
 612  */
 613         .seg "text"
 614 
 615         ENTRY(init_mondo)
 616 #ifdef DEBUG
 617         !
 618         ! IDSR should not be busy at the moment
 619         !
 620         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 621         btst    IDSR_BUSY, %g1
 622         bz,pt   %xcc, 1f
 623         nop
 624 
 625         sethi   %hi(_dispatch_status_busy), %o0
 626         call    panic
 627         or      %o0, %lo(_dispatch_status_busy), %o0
 628 #endif /* DEBUG */
 629 
 630         ALTENTRY(init_mondo_nocheck)
 631         !
 632         ! interrupt vector dispach data reg 0
 633         !
 634 1:
 635         mov     IDDR_0, %g1
 636         mov     IDDR_1, %g2
 637         mov     IDDR_2, %g3
 638         stxa    %o0, [%g1]ASI_INTR_DISPATCH
 639 
 640         !
 641         ! interrupt vector dispach data reg 1
 642         !
 643         stxa    %o1, [%g2]ASI_INTR_DISPATCH
 644 
 645         !
 646         ! interrupt vector dispach data reg 2
 647         !
 648         stxa    %o2, [%g3]ASI_INTR_DISPATCH
 649 
 650         retl
 651         membar  #Sync                   ! allowed to be in the delay slot
 652         SET_SIZE(init_mondo)
 653 
 654 /*
 655  * Ship mondo to upaid
 656  */
 657         ENTRY_NP(shipit)
 658         sll     %o0, IDCR_PID_SHIFT, %g1        ! IDCR<18:14> = upa id
 659         or      %g1, IDCR_OFFSET, %g1           ! IDCR<13:0> = 0x70
 660         stxa    %g0, [%g1]ASI_INTR_DISPATCH     ! interrupt vector dispatch
 661 #if defined(SF_ERRATA_54)
 662         membar  #Sync                           ! store must occur before load
 663         mov     0x20, %g3                       ! UDBH Control Register Read
 664         ldxa    [%g3]ASI_SDB_INTR_R, %g0
 665 #endif
 666         retl
 667         membar  #Sync
 668         SET_SIZE(shipit)
 669 
 670 
 671 /*
 672  * flush_instr_mem:
 673  *      Flush a portion of the I-$ starting at vaddr
 674  *      %o0 vaddr
 675  *      %o1 bytes to be flushed
 676  */
 677 
 678         ENTRY(flush_instr_mem)
 679         membar  #StoreStore                             ! Ensure the stores
 680                                                         ! are globally visible
 681 1:
 682         flush   %o0
 683         subcc   %o1, ICACHE_FLUSHSZ, %o1                ! bytes = bytes-0x20
 684         bgu,pt  %ncc, 1b
 685         add     %o0, ICACHE_FLUSHSZ, %o0                ! vaddr = vaddr+0x20
 686 
 687         retl
 688         nop
 689         SET_SIZE(flush_instr_mem)
 690 
 691 /*
 692  * flush_ecache:
 693  * Flush the entire e$ using displacement flush by reading through a
 694  * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
 695  * reading this physical address range so that data doesn't go to d$.
 696  * incoming arguments:
 697  *      %o0 - 64 bit physical address
 698  *      %o1 - size of address range to read
 699  *      %o2 - ecache linesize
 700  */
 701         ENTRY(flush_ecache)
 702 #ifndef HUMMINGBIRD
 703         b       2f
 704           nop
 705 1:
 706         ldxa    [%o0 + %o1]ASI_MEM, %g0 ! start reading from physaddr + size
 707 2:
 708         subcc   %o1, %o2, %o1
 709         bcc,a,pt %ncc, 1b
 710           nop
 711 
 712 #else /* HUMMINGBIRD */
 713         /*
 714          * UltraSPARC-IIe processor supports both 4-way set associative
 715          * and direct map E$. For performance reasons, we flush E$ by
 716          * placing it in direct map mode for data load/store and restore
 717          * the state after we are done flushing it. It takes 2 iterations
 718          * to guarantee that the entire ecache has been flushed.
 719          *
 720          * Keep the interrupts disabled while flushing E$ in this manner.
 721          */
 722         rdpr    %pstate, %g4            ! current pstate (restored later)
 723         andn    %g4, PSTATE_IE, %g5
 724         wrpr    %g0, %g5, %pstate       ! disable interrupts
 725 
 726         ! Place E$ in direct map mode for data access
 727         or      %g0, 1, %g5
 728         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
 729         ldxa    [%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
 730         or      %g1, %g5, %g5
 731         membar  #Sync
 732         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
 733         membar  #Sync
 734 
 735         ! flush entire ecache HB_ECACHE_FLUSH_CNT times
 736         mov     HB_ECACHE_FLUSH_CNT-1, %g5
 737 2:
 738         sub     %o1, %o2, %g3           ! start from last entry
 739 1:
 740         ldxa    [%o0 + %g3]ASI_MEM, %g0 ! start reading from physaddr + size
 741         subcc   %g3, %o2, %g3
 742         bgeu,a,pt %ncc, 1b
 743           nop
 744         brgz,a,pt %g5, 2b
 745           dec   %g5
 746 
 747         membar  #Sync
 748         stxa    %g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
 749         membar  #Sync
 750         wrpr    %g0, %g4, %pstate       ! restore earlier pstate
 751 #endif /* HUMMINGBIRD */
 752 
 753         retl
 754         nop
 755         SET_SIZE(flush_ecache)
 756 
 757 /*
 758  * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
 759  *                      int icache_size, int icache_linesize)
 760  */
 761         ENTRY(kdi_flush_idcache)
 762         DCACHE_FLUSHALL(%o0, %o1, %g1)
 763         ICACHE_FLUSHALL(%o2, %o3, %g1)
 764         membar  #Sync
 765         retl
 766         nop
 767         SET_SIZE(kdi_flush_idcache)
 768         
 769 
 770 /*
 771  * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
 772  *                      uint64_t *oafsr, uint64_t *acc_afsr)
 773  *
 774  * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
 775  * on a 64-byte boundary.  The corresponding AFSR value is also read for each
 776  * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
 777  * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
 778  * data and tag should be atomic to make sense.  We will be executing at PIL15
 779  * and will disable IE, so nothing can occur between the two reads.  We also
 780  * assume that the execution of this code does not interfere with what we are
 781  * reading - not really possible, but we'll live with it for now.
 782  * We also pass the old AFSR value before clearing it, and caller will take
 783  * appropriate actions if the important bits are non-zero. 
 784  *
 785  * If the caller wishes to track the AFSR in cases where the CP bit is
 786  * set, an address should be passed in for acc_afsr.  Otherwise, this
 787  * argument may be null.
 788  *
 789  * Register Usage:
 790  * i0: In: 32-bit e$ index
 791  * i1: In: addr of e$ data
 792  * i2: In: addr of e$ tag
 793  * i3: In: addr of old afsr
 794  * i4: In: addr of accumulated afsr - may be null
 795  */
 796         ENTRY(get_ecache_dtag)
 797         save    %sp, -SA(MINFRAME), %sp
 798         or      %g0, 1, %l4
 799         sllx    %l4, 39, %l4    ! set bit 39 for e$ data access
 800         or      %i0, %l4, %g6   ! %g6 = e$ addr for data read
 801         sllx    %l4, 1, %l4     ! set bit 40 for e$ tag access
 802         or      %i0, %l4, %l4   ! %l4 = e$ addr for tag read
 803 
 804         rdpr    %pstate, %i5
 805         andn    %i5, PSTATE_IE | PSTATE_AM, %i0
 806         wrpr    %i0, %g0, %pstate       ! clear IE, AM bits
 807 
 808         ldxa    [%g0]ASI_ESTATE_ERR, %g1
 809         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
 810         membar  #Sync
 811 
 812         ldxa    [%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
 813         stx     %i0, [%i3]              ! write back the old-afsr
 814 
 815         ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
 816         ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
 817         stx     %i0, [%i2]              ! write back tag result
 818 
 819         clr     %i2                     ! loop count
 820 
 821         brz     %i4, 1f                 ! acc_afsr == NULL?
 822           ldxa  [%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
 823         srlx    %i0, P_AFSR_CP_SHIFT, %l0
 824         btst    1, %l0
 825         bz      1f
 826           nop
 827         ldx     [%i4], %g4
 828         or      %g4, %i0, %g4           ! aggregate AFSR in cpu private
 829         stx     %g4, [%i4]
 830 1:
 831         stxa    %i0, [%g0]ASI_AFSR      ! clear AFSR
 832         membar  #Sync
 833         ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
 834         stx     %i0, [%i1]              ! save the E$data
 835         add     %g6, 8, %g6
 836         add     %i1, 8, %i1
 837         ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
 838         srlx    %i0, P_AFSR_CP_SHIFT, %l0
 839         btst    1, %l0
 840         bz      2f
 841           stx     %i0, [%i1]            ! save the AFSR
 842 
 843         brz     %i4, 2f                 ! acc_afsr == NULL?
 844           nop
 845         ldx     [%i4], %g4
 846         or      %g4, %i0, %g4           ! aggregate AFSR in cpu private
 847         stx     %g4, [%i4]
 848 2:
 849         add     %i2, 8, %i2
 850         cmp     %i2, 64
 851         bl,a    1b
 852           add     %i1, 8, %i1
 853         stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
 854         membar  #Sync
 855         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
 856         membar  #Sync
 857         wrpr    %g0, %i5, %pstate
 858         ret
 859           restore
 860         SET_SIZE(get_ecache_dtag)
 861 
 862 /*
 863  * The ce_err function handles trap type 0x63 (corrected_ECC_error) at tl=0.
 864  * Steps: 1. GET AFSR  2. Get AFAR <40:4> 3. Get datapath error status
 865  *        4. Clear datapath error bit(s) 5. Clear AFSR error bit
 866  *        6. package data in %g2 and %g3 7. call cpu_ce_error vis sys_trap
 867  * %g2: [ 52:43 UDB lower | 42:33 UDB upper | 32:0 afsr ] - arg #3/arg #1
 868  * %g3: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
 869  */
 870         ENTRY_NP(ce_err)
 871         ldxa    [%g0]ASI_AFSR, %g3      ! save afsr in g3
 872 
 873         !
 874         ! Check for a UE... From Kevin.Normoyle:
 875         ! We try to switch to the trap for the UE, but since that's
 876         ! a hardware pipeline, we might get to the CE trap before we
 877         ! can switch. The UDB and AFSR registers will have both the
 878         ! UE and CE bits set but the UDB syndrome and the AFAR will be
 879         ! for the UE.
 880         !
 881         or      %g0, 1, %g1             ! put 1 in g1
 882         sllx    %g1, 21, %g1            ! shift left to <21> afsr UE
 883         andcc   %g1, %g3, %g0           ! check for UE in afsr
 884         bnz     async_err               ! handle the UE, not the CE
 885           or    %g0, 0x63, %g5          ! pass along the CE ttype
 886         !
 887         ! Disable further CE traps to avoid recursion (stack overflow)
 888         ! and staying above XCALL_PIL for extended periods.
 889         !
 890         ldxa    [%g0]ASI_ESTATE_ERR, %g2
 891         andn    %g2, 0x1, %g2           ! clear bit 0 - CEEN
 892         stxa    %g2, [%g0]ASI_ESTATE_ERR
 893         membar  #Sync                   ! required
 894         !
 895         ! handle the CE
 896         ldxa    [%g0]ASI_AFAR, %g2      ! save afar in g2
 897 
 898         set     P_DER_H, %g4            ! put P_DER_H in g4
 899         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
 900         or      %g0, 1, %g6             ! put 1 in g6
 901         sllx    %g6, 8, %g6             ! shift g6 to <8> sdb CE
 902         andcc   %g5, %g6, %g1           ! check for CE in upper half
 903         sllx    %g5, 33, %g5            ! shift upper bits to <42:33>
 904         or      %g3, %g5, %g3           ! or with afsr bits
 905         bz,a    1f                      ! no error, goto 1f
 906           nop
 907         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
 908         membar  #Sync                   ! membar sync required
 909 1:
 910         set     P_DER_L, %g4            ! put P_DER_L in g4
 911         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
 912         andcc   %g5, %g6, %g1           ! check for CE in lower half
 913         sllx    %g5, 43, %g5            ! shift upper bits to <52:43>
 914         or      %g3, %g5, %g3           ! or with afsr bits
 915         bz,a    2f                      ! no error, goto 2f
 916           nop
 917         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
 918         membar  #Sync                   ! membar sync required
 919 2:
 920         or      %g0, 1, %g4             ! put 1 in g4
 921         sllx    %g4, 20, %g4            ! shift left to <20> afsr CE
 922         stxa    %g4, [%g0]ASI_AFSR      ! use g4 to clear afsr CE error
 923         membar  #Sync                   ! membar sync required
 924 
 925         set     cpu_ce_error, %g1       ! put *cpu_ce_error() in g1
 926         rdpr    %pil, %g6               ! read pil into %g6
 927         subcc   %g6, PIL_15, %g0
 928           movneg        %icc, PIL_14, %g4 ! run at pil 14 unless already at 15
 929         sethi   %hi(sys_trap), %g5
 930         jmp     %g5 + %lo(sys_trap)     ! goto sys_trap
 931           movge %icc, PIL_15, %g4       ! already at pil 15
 932         SET_SIZE(ce_err)
 933 
 934         ENTRY_NP(ce_err_tl1)
 935 #ifndef TRAPTRACE
 936         ldxa    [%g0]ASI_AFSR, %g7
 937         stxa    %g7, [%g0]ASI_AFSR
 938         membar  #Sync
 939         retry
 940 #else
 941         set     ce_trap_tl1, %g1
 942         sethi   %hi(dis_err_panic1), %g4
 943         jmp     %g4 + %lo(dis_err_panic1)
 944         nop
 945 #endif
 946         SET_SIZE(ce_err_tl1)
 947 
 948 #ifdef  TRAPTRACE
 949 .celevel1msg:
 950         .asciz  "Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";
 951 
 952         ENTRY_NP(ce_trap_tl1)
 953         ! upper 32 bits of AFSR already in o3
 954         mov     %o4, %o0                ! save AFAR upper 32 bits
 955         mov     %o2, %o4                ! lower 32 bits of AFSR
 956         mov     %o1, %o2                ! lower 32 bits of AFAR
 957         mov     %o0, %o1                ! upper 32 bits of AFAR
 958         set     .celevel1msg, %o0
 959         call    panic
 960         nop
 961         SET_SIZE(ce_trap_tl1)
 962 #endif
 963 
 964 /*
 965  * The async_err function handles trap types 0x0A (instruction_access_error)
 966  * and 0x32 (data_access_error) at TL = 0 and TL > 0.  When we branch here,
 967  * %g5 will have the trap type (with 0x200 set if we're at TL > 0).
 968  *
 969  * Steps: 1. Get AFSR 2. Get AFAR <40:4> 3. If not UE error skip UDP registers.
 970  *        4. Else get and clear datapath error bit(s) 4. Clear AFSR error bits
 971  *        6. package data in %g2 and %g3 7. disable all cpu errors, because
 972  *        trap is likely to be fatal 8. call cpu_async_error vis sys_trap
 973  *
 974  * %g3: [ 63:53 tt | 52:43 UDB_L | 42:33 UDB_U | 32:0 afsr ] - arg #3/arg #1
 975  * %g2: [ 40:4 afar ] - sys_trap->have_win: arg #4/arg #2
 976  *
 977  * async_err is the assembly glue code to get us from the actual trap
 978  * into the CPU module's C error handler.  Note that we also branch
 979  * here from ce_err() above.
 980  */
 981         ENTRY_NP(async_err)
 982         stxa    %g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
 983         membar  #Sync                   ! membar sync required
 984 
 985         ldxa    [%g0]ASI_AFSR, %g3      ! save afsr in g3
 986         ldxa    [%g0]ASI_AFAR, %g2      ! save afar in g2
 987 
 988         sllx    %g5, 53, %g5            ! move ttype to <63:53>
 989         or      %g3, %g5, %g3           ! or to afsr in g3
 990 
 991         or      %g0, 1, %g1             ! put 1 in g1
 992         sllx    %g1, 21, %g1            ! shift left to <21> afsr UE
 993         andcc   %g1, %g3, %g0           ! check for UE in afsr
 994         bz,a,pn %icc, 2f                ! if !UE skip sdb read/clear
 995           nop
 996 
 997         set     P_DER_H, %g4            ! put P_DER_H in g4
 998         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
 999         or      %g0, 1, %g6             ! put 1 in g6
1000         sllx    %g6, 9, %g6             ! shift g6 to <9> sdb UE
1001         andcc   %g5, %g6, %g1           ! check for UE in upper half
1002         sllx    %g5, 33, %g5            ! shift upper bits to <42:33>
1003         or      %g3, %g5, %g3           ! or with afsr bits
1004         bz,a    1f                      ! no error, goto 1f
1005           nop
1006         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1007         membar  #Sync                   ! membar sync required
1008 1:
1009         set     P_DER_L, %g4            ! put P_DER_L in g4
1010         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1011         andcc   %g5, %g6, %g1           ! check for UE in lower half
1012         sllx    %g5, 43, %g5            ! shift upper bits to <52:43>
1013         or      %g3, %g5, %g3           ! or with afsr bits
1014         bz,a    2f                      ! no error, goto 2f
1015           nop
1016         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
1017         membar  #Sync                   ! membar sync required
1018 2:
1019         stxa    %g3, [%g0]ASI_AFSR      ! clear all the sticky bits
1020         membar  #Sync                   ! membar sync required
1021 
1022         RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1023 async_err_resetskip:
1024 
1025         set     cpu_async_error, %g1    ! put cpu_async_error in g1
1026         sethi   %hi(sys_trap), %g5
1027         jmp     %g5 + %lo(sys_trap)     ! goto sys_trap
1028           or    %g0, PIL_15, %g4        ! run at pil 15
1029         SET_SIZE(async_err)
1030 
1031         ENTRY_NP(dis_err_panic1)
1032         stxa    %g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
1033         membar  #Sync
1034         ! save destination routine is in g1
1035         ldxa    [%g0]ASI_AFAR, %g2      ! read afar
1036         ldxa    [%g0]ASI_AFSR, %g3      ! read afsr
1037         set     P_DER_H, %g4            ! put P_DER_H in g4
1038         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1039         sllx    %g5, 33, %g5            ! shift upper bits to <42:33>
1040         or      %g3, %g5, %g3           ! or with afsr bits
1041         set     P_DER_L, %g4            ! put P_DER_L in g4
1042         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1043         sllx    %g5, 43, %g5            ! shift upper bits to <52:43>
1044         or      %g3, %g5, %g3           ! or with afsr bits
1045 
1046         RESET_USER_RTT_REGS(%g4, %g5, dis_err_panic1_resetskip)
1047 dis_err_panic1_resetskip:
1048 
1049         sethi   %hi(sys_trap), %g5
1050         jmp     %g5 + %lo(sys_trap)     ! goto sys_trap
1051           sub   %g0, 1, %g4
1052         SET_SIZE(dis_err_panic1)
1053 
1054 /*
1055  * The clr_datapath function clears any error bits set in the UDB regs.
1056  */
1057         ENTRY(clr_datapath)
1058         set     P_DER_H, %o4                    ! put P_DER_H in o4
1059         ldxa    [%o4]ASI_SDB_INTR_R, %o5        ! read sdb upper half into o3
1060         or      %g0, 0x3, %o2                   ! put 0x3 in o2
1061         sllx    %o2, 8, %o2                     ! shift o2 to <9:8> sdb
1062         andcc   %o5, %o2, %o1                   ! check for UE,CE in upper half
1063         bz,a    1f                              ! no error, goto 1f
1064           nop
1065         stxa    %o1, [%o4]ASI_SDB_INTR_W        ! clear sdb reg UE,CE error bits
1066         membar  #Sync                           ! membar sync required
1067 1:
1068         set     P_DER_L, %o4                    ! put P_DER_L in o4
1069         ldxa    [%o4]ASI_SDB_INTR_R, %o5        ! read sdb lower half into o5
1070         andcc   %o5, %o2, %o1                   ! check for UE,CE in lower half
1071         bz,a    2f                              ! no error, goto 2f
1072           nop
1073         stxa    %o1, [%o4]ASI_SDB_INTR_W        ! clear sdb reg UE,CE error bits
1074         membar  #Sync
1075 2:
1076         retl
1077           nop
1078         SET_SIZE(clr_datapath)
1079 
1080 /*
1081  * The get_udb_errors() function gets the current value of the
1082  * Datapath Error Registers.
1083  */
1084         ENTRY(get_udb_errors)
1085         set     P_DER_H, %o3
1086         ldxa    [%o3]ASI_SDB_INTR_R, %o2
1087         stx     %o2, [%o0]
1088         set     P_DER_L, %o3
1089         ldxa    [%o3]ASI_SDB_INTR_R, %o2
1090         retl
1091           stx   %o2, [%o1]
1092         SET_SIZE(get_udb_errors)
1093 
1094 /*
1095  * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the
1096  * tte, the virtual address, and the ctxnum of the specified tlb entry.  They
1097  * should only be used in places where you have no choice but to look at the
1098  * tlb itself.
1099  *
1100  * Note: These two routines are required by the Estar "cpr" loadable module.
1101  */
1102 /*
1103  * NB - In Spitfire cpus, when reading a tte from the hardware, we
1104  * need to clear [42-41] because the general definitions in pte.h
1105  * define the PA to be [42-13] whereas Spitfire really uses [40-13].
1106  * When cloning these routines for other cpus the "andn" below is not
1107  * necessary.
1108  */
1109         ENTRY_NP(itlb_rd_entry)
1110         sllx    %o0, 3, %o0
1111 #if defined(SF_ERRATA_32)
1112         sethi   %hi(FLUSH_ADDR), %g2
1113         set     MMU_PCONTEXT, %g1
1114         stxa    %g0, [%g1]ASI_DMMU                      ! KCONTEXT
1115         flush   %g2
1116 #endif
1117         ldxa    [%o0]ASI_ITLB_ACCESS, %g1
1118         set     TTE_SPITFIRE_PFNHI_CLEAR, %g2           ! spitfire only
1119         sllx    %g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2      ! see comment above
1120         andn    %g1, %g2, %g1                           ! for details
1121         stx     %g1, [%o1]
1122         ldxa    [%o0]ASI_ITLB_TAGREAD, %g2
1123         set     TAGREAD_CTX_MASK, %o4
1124         andn    %g2, %o4, %o5
1125         retl
1126           stx   %o5, [%o2]
1127         SET_SIZE(itlb_rd_entry)
1128 
1129         ENTRY_NP(dtlb_rd_entry)
1130         sllx    %o0, 3, %o0
1131 #if defined(SF_ERRATA_32)
1132         sethi   %hi(FLUSH_ADDR), %g2
1133         set     MMU_PCONTEXT, %g1
1134         stxa    %g0, [%g1]ASI_DMMU                      ! KCONTEXT
1135         flush   %g2
1136 #endif
1137         ldxa    [%o0]ASI_DTLB_ACCESS, %g1
1138         set     TTE_SPITFIRE_PFNHI_CLEAR, %g2           ! spitfire only
1139         sllx    %g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2      ! see comment above
1140         andn    %g1, %g2, %g1                           ! itlb_rd_entry
1141         stx     %g1, [%o1]
1142         ldxa    [%o0]ASI_DTLB_TAGREAD, %g2
1143         set     TAGREAD_CTX_MASK, %o4
1144         andn    %g2, %o4, %o5
1145         retl
1146           stx   %o5, [%o2]
1147         SET_SIZE(dtlb_rd_entry)
1148 
1149         ENTRY(set_lsu)
1150         stxa    %o0, [%g0]ASI_LSU               ! store to LSU
1151         retl
1152         membar  #Sync
1153         SET_SIZE(set_lsu)
1154 
1155         ENTRY(get_lsu)
1156         retl
1157         ldxa    [%g0]ASI_LSU, %o0               ! load LSU
1158         SET_SIZE(get_lsu)
1159 
1160         /*
1161          * Clear the NPT (non-privileged trap) bit in the %tick
1162          * registers. In an effort to make the change in the
1163          * tick counter as consistent as possible, we disable
1164          * all interrupts while we're changing the registers. We also
1165          * ensure that the read and write instructions are in the same
1166          * line in the instruction cache.
1167          */
1168         ENTRY_NP(cpu_clearticknpt)
1169         rdpr    %pstate, %g1            /* save processor state */
1170         andn    %g1, PSTATE_IE, %g3     /* turn off */
1171         wrpr    %g0, %g3, %pstate       /*   interrupts */
1172         rdpr    %tick, %g2              /* get tick register */
1173         brgez,pn %g2, 1f                /* if NPT bit off, we're done */
1174         mov     1, %g3                  /* create mask */
1175         sllx    %g3, 63, %g3            /*   for NPT bit */
1176         ba,a,pt %xcc, 2f
1177         .align  64                      /* Align to I$ boundary */
1178 2:
1179         rdpr    %tick, %g2              /* get tick register */
1180         wrpr    %g3, %g2, %tick         /* write tick register, */
1181                                         /*   clearing NPT bit   */
1182 #if defined(BB_ERRATA_1)
1183         rdpr    %tick, %g0              /* read (s)tick (BB_ERRATA_1) */
1184 #endif
1185 1:
1186         jmp     %g4 + 4
1187         wrpr    %g0, %g1, %pstate       /* restore processor state */
1188         SET_SIZE(cpu_clearticknpt)
1189 
1190         /*
1191          * get_ecache_tag()
1192          * Register Usage:
1193          * %o0: In: 32-bit E$ index
1194          *      Out: 64-bit E$ tag value
1195          * %o1: In: 64-bit AFSR value after clearing sticky bits
1196          * %o2: In: address of cpu private afsr storage
1197          */
1198         ENTRY(get_ecache_tag)
1199         or      %g0, 1, %o4
1200         sllx    %o4, 40, %o4                    ! set bit 40 for e$ tag access
1201         or      %o0, %o4, %o4                   ! %o4 = e$ addr for tag read
1202         rdpr    %pstate, %o5
1203         andn    %o5, PSTATE_IE | PSTATE_AM, %o0
1204         wrpr    %o0, %g0, %pstate               ! clear IE, AM bits
1205 
1206         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1207         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1208         membar  #Sync
1209 
1210         ldxa    [%g0]ASI_AFSR, %o0
1211         srlx    %o0, P_AFSR_CP_SHIFT, %o3
1212         btst    1, %o3
1213         bz      1f
1214           nop
1215         ldx     [%o2], %g4
1216         or      %g4, %o0, %g4                   ! aggregate AFSR in cpu private
1217         stx     %g4, [%o2]
1218 1:
1219         stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1220         membar  #Sync
1221 
1222         ldxa    [%o4]ASI_EC_R, %g0
1223         ldxa    [%g0]ASI_EC_DIAG, %o0           ! read tag from e$ tag reg
1224 
1225         ldxa    [%g0]ASI_AFSR, %o3
1226         srlx    %o3, P_AFSR_CP_SHIFT, %o4
1227         btst    1, %o4
1228         bz      2f
1229           stx   %o3, [%o1]                      ! AFSR after sticky clear
1230         ldx     [%o2], %g4
1231         or      %g4, %o3, %g4                   ! aggregate AFSR in cpu private
1232         stx     %g4, [%o2]
1233 2:
1234         membar  #Sync
1235 
1236         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1237         membar  #Sync
1238         retl
1239         wrpr    %g0, %o5, %pstate
1240         SET_SIZE(get_ecache_tag)
1241 
1242         /*
1243          * check_ecache_line()
1244          * Register Usage:
1245          * %o0: In: 32-bit E$ index
1246          *      Out: 64-bit accumulated AFSR
1247          * %o1: In: address of cpu private afsr storage
1248          */
1249         ENTRY(check_ecache_line)
1250         or      %g0, 1, %o4
1251         sllx    %o4, 39, %o4                    ! set bit 39 for e$ data access
1252         or      %o0, %o4, %o4                   ! %o4 = e$ addr for data read
1253 
1254         rdpr    %pstate, %o5
1255         andn    %o5, PSTATE_IE | PSTATE_AM, %o0
1256         wrpr    %o0, %g0, %pstate               ! clear IE, AM bits
1257 
1258         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1259         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1260         membar  #Sync
1261 
1262         ldxa    [%g0]ASI_AFSR, %o0
1263         srlx    %o0, P_AFSR_CP_SHIFT, %o2
1264         btst    1, %o2
1265         bz      1f
1266           clr   %o2                             ! loop count
1267         ldx     [%o1], %o3
1268         or      %o3, %o0, %o3                   ! aggregate AFSR in cpu private
1269         stx     %o3, [%o1]
1270 1: 
1271         stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1272         membar  #Sync
1273 
1274 2:
1275         ldxa    [%o4]ASI_EC_R, %g0              ! Read the E$ data 8bytes each
1276         add     %o2, 1, %o2
1277         cmp     %o2, 8
1278         bl,a    2b
1279           add   %o4, 8, %o4
1280 
1281         membar  #Sync
1282         ldxa    [%g0]ASI_AFSR, %o0              ! read accumulated AFSR
1283         srlx    %o0, P_AFSR_CP_SHIFT, %o2
1284         btst    1, %o2
1285         bz      3f
1286           nop
1287         ldx     [%o1], %o3
1288         or      %o3, %o0, %o3                   ! aggregate AFSR in cpu private
1289         stx     %o3, [%o1]
1290 3:
1291         stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1292         membar  #Sync
1293         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1294         membar  #Sync
1295         retl
1296         wrpr    %g0, %o5, %pstate
1297         SET_SIZE(check_ecache_line)
1298 
1299         ENTRY(read_and_clear_afsr)
1300         ldxa    [%g0]ASI_AFSR, %o0
1301         retl
1302           stxa  %o0, [%g0]ASI_AFSR              ! clear AFSR
1303         SET_SIZE(read_and_clear_afsr)
1304 
1305 /*
1306  * scrubphys - Pass in the aligned physical memory address that you want
1307  * to scrub, along with the ecache size.
1308  *
1309  *      1) Displacement flush the E$ line corresponding to %addr.
1310  *         The first ldxa guarantees that the %addr is no longer in
1311  *         M, O, or E (goes to I or S (if instruction fetch also happens).
1312  *      2) "Write" the data using a CAS %addr,%g0,%g0.
1313  *         The casxa guarantees a transition from I to M or S to M.
1314  *      3) Displacement flush the E$ line corresponding to %addr.
1315  *         The second ldxa pushes the M line out of the ecache, into the
1316  *         writeback buffers, on the way to memory.
1317  *      4) The "membar #Sync" pushes the cache line out of the writeback
1318  *         buffers onto the bus, on the way to dram finally.
1319  *
1320  * This is a modified version of the algorithm suggested by Gary Lauterbach.
1321  * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
1322  * as modified, but then we found out that for spitfire, if it misses in the
1323  * E$ it will probably install as an M, but if it hits in the E$, then it
1324  * will stay E, if the store doesn't happen. So the first displacement flush
1325  * should ensure that the CAS will miss in the E$.  Arrgh.
1326  */
1327 
1328         ENTRY(scrubphys)
1329         or      %o1, %g0, %o2   ! put ecache size in %o2
1330 #ifndef HUMMINGBIRD
1331         xor     %o0, %o2, %o1   ! calculate alias address
1332         add     %o2, %o2, %o3   ! 2 * ecachesize in case
1333                                 ! addr == ecache_flushaddr
1334         sub     %o3, 1, %o3     ! -1 == mask
1335         and     %o1, %o3, %o1   ! and with xor'd address
1336         set     ecache_flushaddr, %o3
1337         ldx     [%o3], %o3
1338 
1339         rdpr    %pstate, %o4
1340         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1341         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1342 
1343         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1344         casxa   [%o0]ASI_MEM, %g0, %g0
1345         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1346 
1347 #else /* HUMMINGBIRD */
1348         /*
1349          * UltraSPARC-IIe processor supports both 4-way set associative
1350          * and direct map E$. We need to reconfigure E$ to direct map
1351          * mode for data load/store before displacement flush. Also, we
1352          * need to flush all 4 sets of the E$ to ensure that the physaddr
1353          * has been flushed. Keep the interrupts disabled while flushing
1354          * E$ in this manner.
1355          *
1356          * For flushing a specific physical address, we start at the
1357          * aliased address and load at set-size stride, wrapping around
1358          * at 2*ecache-size boundary and skipping fault physical address.
1359          * It takes 10 loads to guarantee that the physical address has
1360          * been flushed.
1361          *
1362          * Usage:
1363          *      %o0     physaddr
1364          *      %o5     physaddr - ecache_flushaddr
1365          *      %g1     UPA config (restored later)
1366          *      %g2     E$ set size
1367          *      %g3     E$ flush address range mask (i.e. 2 * E$ -1)
1368          *      %g4     #loads to flush phys address
1369          *      %g5     temp 
1370          */
1371 
1372         sethi   %hi(ecache_associativity), %g5
1373         ld      [%g5 + %lo(ecache_associativity)], %g5
1374         udivx   %o2, %g5, %g2   ! set size (i.e. ecache_size/#sets)
1375         xor     %o0, %o2, %o1   ! calculate alias address
1376         add     %o2, %o2, %g3   ! 2 * ecachesize in case
1377                                 ! addr == ecache_flushaddr
1378         sub     %g3, 1, %g3     ! 2 * ecachesize -1 == mask
1379         and     %o1, %g3, %o1   ! and with xor'd address
1380         sethi   %hi(ecache_flushaddr), %o3
1381         ldx     [%o3 + %lo(ecache_flushaddr)], %o3
1382 
1383         rdpr    %pstate, %o4
1384         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1385         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1386 
1387         ! Place E$ in direct map mode for data access
1388         or      %g0, 1, %g5
1389         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
1390         ldxa    [%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1391         or      %g1, %g5, %g5
1392         membar  #Sync
1393         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1394         membar  #Sync
1395 
1396         ! Displace cache line from each set of E$ starting at the
1397         ! aliased address. at set-size stride, wrapping at 2*ecache_size
1398         ! and skipping load from physaddr. We need 10 loads to flush the
1399         ! physaddr from E$.
1400         mov     HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1401         sub     %o0, %o3, %o5           ! physaddr - ecache_flushaddr
1402         or      %o1, %g0, %g5           ! starting aliased offset
1403 2:
1404         ldxa    [%g5 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1405 1:
1406         add     %g5, %g2, %g5           ! calculate offset in next set
1407         and     %g5, %g3, %g5           ! force offset within aliased range
1408         cmp     %g5, %o5                ! skip loads from physaddr
1409         be,pn %ncc, 1b
1410           nop
1411         brgz,pt %g4, 2b
1412           dec   %g4
1413 
1414         casxa   [%o0]ASI_MEM, %g0, %g0
1415 
1416         ! Flush %o0 from ecahe again.
1417         ! Need single displacement flush at offset %o1 this time as
1418         ! the E$ is already in direct map mode.
1419         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1420 
1421         membar  #Sync
1422         stxa    %g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1423         membar  #Sync
1424 #endif /* HUMMINGBIRD */
1425         wrpr    %g0, %o4, %pstate       ! restore earlier pstate register value
1426 
1427         retl
1428         membar  #Sync                   ! move the data out of the load buffer
1429         SET_SIZE(scrubphys)
1430 
1431 /*
1432  * clearphys - Pass in the aligned physical memory address that you want
1433  * to push out, as a 64 byte block of zeros, from the ecache zero-filled.
1434  * Since this routine does not bypass the ecache, it is possible that
1435  * it could generate a UE error while trying to clear the a bad line.
1436  * This routine clears and restores the error enable flag.
1437  * TBD - Hummingbird may need similar protection
1438  */
1439         ENTRY(clearphys)
1440         or      %o2, %g0, %o3   ! ecache linesize
1441         or      %o1, %g0, %o2   ! ecache size
1442 #ifndef HUMMINGBIRD
1443         or      %o3, %g0, %o4   ! save ecache linesize
1444         xor     %o0, %o2, %o1   ! calculate alias address
1445         add     %o2, %o2, %o3   ! 2 * ecachesize
1446         sub     %o3, 1, %o3     ! -1 == mask
1447         and     %o1, %o3, %o1   ! and with xor'd address
1448         set     ecache_flushaddr, %o3
1449         ldx     [%o3], %o3
1450         or      %o4, %g0, %o2   ! saved ecache linesize
1451 
1452         rdpr    %pstate, %o4
1453         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1454         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1455 
1456         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1457         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
1458         membar  #Sync
1459 
1460         ! need to put zeros in the cache line before displacing it
1461 
1462         sub     %o2, 8, %o2     ! get offset of last double word in ecache line
1463 1:
1464         stxa    %g0, [%o0 + %o2]ASI_MEM ! put zeros in the ecache line
1465         sub     %o2, 8, %o2
1466         brgez,a,pt %o2, 1b
1467         nop
1468         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1469         casxa   [%o0]ASI_MEM, %g0, %g0
1470         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1471 
1472         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
1473         membar  #Sync
1474 
1475 #else /* HUMMINGBIRD... */
1476         /*
1477          * UltraSPARC-IIe processor supports both 4-way set associative
1478          * and direct map E$. We need to reconfigure E$ to direct map
1479          * mode for data load/store before displacement flush. Also, we
1480          * need to flush all 4 sets of the E$ to ensure that the physaddr
1481          * has been flushed. Keep the interrupts disabled while flushing
1482          * E$ in this manner.
1483          *
1484          * For flushing a specific physical address, we start at the
1485          * aliased address and load at set-size stride, wrapping around
1486          * at 2*ecache-size boundary and skipping fault physical address.
1487          * It takes 10 loads to guarantee that the physical address has
1488          * been flushed.
1489          *
1490          * Usage:
1491          *      %o0     physaddr
1492          *      %o5     physaddr - ecache_flushaddr
1493          *      %g1     UPA config (restored later)
1494          *      %g2     E$ set size
1495          *      %g3     E$ flush address range mask (i.e. 2 * E$ -1)
1496          *      %g4     #loads to flush phys address
1497          *      %g5     temp 
1498          */
1499 
1500         or      %o3, %g0, %o4   ! save ecache linesize
1501         sethi   %hi(ecache_associativity), %g5
1502         ld      [%g5 + %lo(ecache_associativity)], %g5
1503         udivx   %o2, %g5, %g2   ! set size (i.e. ecache_size/#sets)
1504 
1505         xor     %o0, %o2, %o1   ! calculate alias address
1506         add     %o2, %o2, %g3   ! 2 * ecachesize
1507         sub     %g3, 1, %g3     ! 2 * ecachesize -1 == mask
1508         and     %o1, %g3, %o1   ! and with xor'd address
1509         sethi   %hi(ecache_flushaddr), %o3
1510         ldx     [%o3 +%lo(ecache_flushaddr)], %o3
1511         or      %o4, %g0, %o2   ! saved ecache linesize
1512 
1513         rdpr    %pstate, %o4
1514         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1515         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1516 
1517         ! Place E$ in direct map mode for data access
1518         or      %g0, 1, %g5
1519         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
1520         ldxa    [%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1521         or      %g1, %g5, %g5
1522         membar  #Sync
1523         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1524         membar  #Sync
1525 
1526         ! need to put zeros in the cache line before displacing it
1527 
1528         sub     %o2, 8, %o2     ! get offset of last double word in ecache line
1529 1:
1530         stxa    %g0, [%o0 + %o2]ASI_MEM ! put zeros in the ecache line
1531         sub     %o2, 8, %o2
1532         brgez,a,pt %o2, 1b
1533         nop
1534 
1535         ! Displace cache line from each set of E$ starting at the
1536         ! aliased address. at set-size stride, wrapping at 2*ecache_size
1537         ! and skipping load from physaddr. We need 10 loads to flush the
1538         ! physaddr from E$.
1539         mov     HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1540         sub     %o0, %o3, %o5           ! physaddr - ecache_flushaddr
1541         or      %o1, %g0, %g5           ! starting offset
1542 2:
1543         ldxa    [%g5 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1544 3:
1545         add     %g5, %g2, %g5           ! calculate offset in next set
1546         and     %g5, %g3, %g5           ! force offset within aliased range
1547         cmp     %g5, %o5                ! skip loads from physaddr
1548         be,pn %ncc, 3b
1549           nop
1550         brgz,pt %g4, 2b
1551           dec   %g4
1552 
1553         casxa   [%o0]ASI_MEM, %g0, %g0
1554 
1555         ! Flush %o0 from ecahe again.
1556         ! Need single displacement flush at offset %o1 this time as
1557         ! the E$ is already in direct map mode.
1558         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1559 
1560         membar  #Sync
1561         stxa    %g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1562         membar  #Sync
1563 #endif /* HUMMINGBIRD... */
1564 
1565         retl
1566         wrpr    %g0, %o4, %pstate       ! restore earlier pstate register value
1567         SET_SIZE(clearphys)
1568 
1569 /*
1570  * flushecacheline - This is a simpler version of scrubphys
1571  * which simply does a displacement flush of the line in
1572  * question. This routine is mainly used in handling async
1573  * errors where we want to get rid of a bad line in ecache.
1574  * Note that if the line is modified and it has suffered
1575  * data corruption - we are guarantee that the hw will write
1576  * a UE back to mark the page poisoned.
1577  */
1578         ENTRY(flushecacheline)
1579         or      %o1, %g0, %o2   ! put ecache size in %o2
1580 #ifndef HUMMINGBIRD
1581         xor     %o0, %o2, %o1   ! calculate alias address
1582         add     %o2, %o2, %o3   ! 2 * ecachesize in case
1583                                 ! addr == ecache_flushaddr
1584         sub     %o3, 1, %o3     ! -1 == mask
1585         and     %o1, %o3, %o1   ! and with xor'd address
1586         set     ecache_flushaddr, %o3
1587         ldx     [%o3], %o3
1588 
1589         rdpr    %pstate, %o4
1590         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1591         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1592 
1593         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1594         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
1595         membar  #Sync
1596 
1597         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1598         membar  #Sync
1599         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
1600         membar  #Sync                   
1601 #else /* HUMMINGBIRD */
1602         /*
1603          * UltraSPARC-IIe processor supports both 4-way set associative
1604          * and direct map E$. We need to reconfigure E$ to direct map
1605          * mode for data load/store before displacement flush. Also, we
1606          * need to flush all 4 sets of the E$ to ensure that the physaddr
1607          * has been flushed. Keep the interrupts disabled while flushing
1608          * E$ in this manner.
1609          *
1610          * For flushing a specific physical address, we start at the
1611          * aliased address and load at set-size stride, wrapping around
1612          * at 2*ecache-size boundary and skipping fault physical address.
1613          * It takes 10 loads to guarantee that the physical address has
1614          * been flushed.
1615          *
1616          * Usage:
1617          *      %o0     physaddr
1618          *      %o5     physaddr - ecache_flushaddr
1619          *      %g1     error enable register
1620          *      %g2     E$ set size
1621          *      %g3     E$ flush address range mask (i.e. 2 * E$ -1)
1622          *      %g4     UPA config (restored later)
1623          *      %g5     temp 
1624          */
1625 
1626         sethi   %hi(ecache_associativity), %g5
1627         ld      [%g5 + %lo(ecache_associativity)], %g5
1628         udivx   %o2, %g5, %g2   ! set size (i.e. ecache_size/#sets)
1629         xor     %o0, %o2, %o1   ! calculate alias address
1630         add     %o2, %o2, %g3   ! 2 * ecachesize in case
1631                                 ! addr == ecache_flushaddr
1632         sub     %g3, 1, %g3     ! 2 * ecachesize -1 == mask
1633         and     %o1, %g3, %o1   ! and with xor'd address
1634         sethi   %hi(ecache_flushaddr), %o3
1635         ldx     [%o3 + %lo(ecache_flushaddr)], %o3
1636 
1637         rdpr    %pstate, %o4
1638         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1639         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1640 
1641         ! Place E$ in direct map mode for data access
1642         or      %g0, 1, %g5
1643         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
1644         ldxa    [%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
1645         or      %g4, %g5, %g5
1646         membar  #Sync
1647         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1648         membar  #Sync
1649 
1650         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1651         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
1652         membar  #Sync
1653 
1654         ! Displace cache line from each set of E$ starting at the
1655         ! aliased address. at set-size stride, wrapping at 2*ecache_size
1656         ! and skipping load from physaddr. We need 10 loads to flush the
1657         ! physaddr from E$.
1658         mov     HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
1659         sub     %o0, %o3, %o5           ! physaddr - ecache_flushaddr
1660 2:
1661         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1662 3:
1663         add     %o1, %g2, %o1           ! calculate offset in next set
1664         and     %o1, %g3, %o1           ! force offset within aliased range
1665         cmp     %o1, %o5                ! skip loads from physaddr
1666         be,pn %ncc, 3b
1667           nop
1668         brgz,pt %g5, 2b
1669           dec   %g5
1670         
1671         membar  #Sync
1672         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
1673         membar  #Sync                   
1674 
1675         stxa    %g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1676         membar  #Sync
1677 #endif /* HUMMINGBIRD */
1678         retl
1679         wrpr    %g0, %o4, %pstate       
1680         SET_SIZE(flushecacheline)
1681 
1682 /*
1683  * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
1684  * from the clock CPU.  It atomically increments the outstanding request
1685  * counter and, if there was not already an outstanding request,
1686  * branches to setsoftint_tl1 to enqueue an intr_vec for the given inum.
1687  */
1688 
1689         ! Register usage:
1690         !
1691         ! Arguments:
1692         ! %g1 - inum
1693         !
1694         ! Internal:
1695         ! %g2, %g3, %g5 - scratch
1696         ! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
1697         ! %g6 - setsoftint_tl1 address
1698 
1699         ENTRY_NP(ecache_scrubreq_tl1)
1700         set     SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
1701         GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
1702         ld      [%g4], %g2              ! cpu's ec_scrub_outstanding.
1703         set     setsoftint_tl1, %g6
1704         !
1705         ! no need to use atomic instructions for the following
1706         ! increment - we're at tl1
1707         !
1708         add     %g2, 0x1, %g3
1709         brnz,pn %g2, 1f                 ! no need to enqueue more intr_vec
1710           st    %g3, [%g4]              ! delay - store incremented counter
1711         jmp     %g6                     ! setsoftint_tl1(%g1) - queue intr_vec
1712           nop
1713         ! not reached
1714 1:
1715         retry
1716         SET_SIZE(ecache_scrubreq_tl1)
1717 
1718         /*
1719          * write_ec_tag_parity(), which zero's the ecache tag,
1720          * marks the state as invalid and writes good parity to the tag.
1721          * Input %o1= 32 bit E$ index
1722          */
1723         ENTRY(write_ec_tag_parity)
1724         or      %g0, 1, %o4
1725         sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1726         or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write
1727 
1728         rdpr    %pstate, %o5
1729         andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1730         wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1731 
1732         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1733         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1734         membar  #Sync
1735 
1736         ba      1f
1737          nop
1738         /*
1739          * Align on the ecache boundary in order to force
1740          * ciritical code section onto the same ecache line.
1741          */
1742          .align 64
1743 
1744 1:
1745         set     S_EC_PARITY, %o3                ! clear tag, state invalid
1746         sllx    %o3, S_ECPAR_SHIFT, %o3         ! and with good tag parity
1747         stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1748         stxa    %g0, [%o4]ASI_EC_W
1749         membar  #Sync
1750 
1751         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1752         membar  #Sync
1753         retl
1754         wrpr    %g0, %o5, %pstate
1755         SET_SIZE(write_ec_tag_parity)
1756 
1757         /*
1758          * write_hb_ec_tag_parity(), which zero's the ecache tag,
1759          * marks the state as invalid and writes good parity to the tag.
1760          * Input %o1= 32 bit E$ index
1761          */
1762         ENTRY(write_hb_ec_tag_parity)
1763         or      %g0, 1, %o4
1764         sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1765         or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write
1766 
1767         rdpr    %pstate, %o5
1768         andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1769         wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1770 
1771         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1772         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1773         membar  #Sync
1774 
1775         ba      1f
1776          nop
1777         /*
1778          * Align on the ecache boundary in order to force
1779          * ciritical code section onto the same ecache line.
1780          */
1781          .align 64
1782 1: 
1783 #ifdef HUMMINGBIRD
1784         set     HB_EC_PARITY, %o3               ! clear tag, state invalid
1785         sllx    %o3, HB_ECPAR_SHIFT, %o3        ! and with good tag parity
1786 #else /* !HUMMINGBIRD */
1787         set     SB_EC_PARITY, %o3               ! clear tag, state invalid
1788         sllx    %o3, SB_ECPAR_SHIFT, %o3        ! and with good tag parity
1789 #endif /* !HUMMINGBIRD */
1790 
1791         stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1792         stxa    %g0, [%o4]ASI_EC_W
1793         membar  #Sync
1794 
1795         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1796         membar  #Sync
1797         retl
1798         wrpr    %g0, %o5, %pstate
1799         SET_SIZE(write_hb_ec_tag_parity)
1800 
1801 #define VIS_BLOCKSIZE           64
1802 
1803         ENTRY(dtrace_blksuword32)
1804         save    %sp, -SA(MINFRAME + 4), %sp
1805 
1806         rdpr    %pstate, %l1
1807         andn    %l1, PSTATE_IE, %l2             ! disable interrupts to
1808         wrpr    %g0, %l2, %pstate               ! protect our FPU diddling
1809 
1810         rd      %fprs, %l0
1811         andcc   %l0, FPRS_FEF, %g0
1812         bz,a,pt %xcc, 1f                        ! if the fpu is disabled
1813         wr      %g0, FPRS_FEF, %fprs            ! ... enable the fpu
1814 
1815         st      %f0, [%fp + STACK_BIAS - 4]     ! save %f0 to the stack
1816 1:
1817         set     0f, %l5
1818         /*
1819          * We're about to write a block full or either total garbage
1820          * (not kernel data, don't worry) or user floating-point data
1821          * (so it only _looks_ like garbage).
1822          */
1823         ld      [%i1], %f0                      ! modify the block
1824         membar  #Sync
1825         stn     %l5, [THREAD_REG + T_LOFAULT]   ! set up the lofault handler
1826         stda    %d0, [%i0]ASI_BLK_COMMIT_S      ! store the modified block
1827         membar  #Sync
1828         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
1829 
1830         bz,a,pt %xcc, 1f
1831         wr      %g0, %l0, %fprs                 ! restore %fprs
1832 
1833         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
1834 1:
1835 
1836         wrpr    %g0, %l1, %pstate               ! restore interrupts
1837 
1838         ret
1839         restore %g0, %g0, %o0
1840 
1841 0:
1842         membar  #Sync
1843         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
1844 
1845         bz,a,pt %xcc, 1f
1846         wr      %g0, %l0, %fprs                 ! restore %fprs
1847 
1848         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
1849 1:
1850 
1851         wrpr    %g0, %l1, %pstate               ! restore interrupts
1852 
1853         /*
1854          * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
1855          * which deals with watchpoints. Otherwise, just return -1.
1856          */
1857         brnz,pt %i2, 1f
1858         nop
1859         ret
1860         restore %g0, -1, %o0
1861 1:
1862         call    dtrace_blksuword32_err
1863         restore
1864 
1865         SET_SIZE(dtrace_blksuword32)
1866