1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include "assym.h"
  27 
  28 #include <sys/asm_linkage.h>
  29 #include <sys/mmu.h>
  30 #include <vm/hat_sfmmu.h>
  31 #include <sys/machparam.h>
  32 #include <sys/machcpuvar.h>
  33 #include <sys/machthread.h>
  34 #include <sys/privregs.h>
  35 #include <sys/asm_linkage.h>
  36 #include <sys/machasi.h>
  37 #include <sys/trap.h>
  38 #include <sys/spitregs.h>
  39 #include <sys/xc_impl.h>
  40 #include <sys/intreg.h>
  41 #include <sys/async.h>
  42 
  43 #ifdef TRAPTRACE
  44 #include <sys/traptrace.h>
  45 #endif /* TRAPTRACE */
  46 
  47 /* BEGIN CSTYLED */
  48 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3)                  \
  49         ldxa    [%g0]ASI_LSU, tmp1                                      ;\
  50         btst    LSU_DC, tmp1            /* is dcache enabled? */        ;\
  51         bz,pn   %icc, 1f                                                ;\
  52         sethi   %hi(dcache_linesize), tmp1                              ;\
  53         ld      [tmp1 + %lo(dcache_linesize)], tmp1                     ;\
  54         sethi   %hi(dflush_type), tmp2                                  ;\
  55         ld      [tmp2 + %lo(dflush_type)], tmp2                         ;\
  56         cmp     tmp2, FLUSHPAGE_TYPE                                    ;\
  57         be,pt   %icc, 2f                                                ;\
  58         sllx    arg1, SF_DC_VBIT_SHIFT, arg1    /* tag to compare */    ;\
  59         sethi   %hi(dcache_size), tmp3                                  ;\
  60         ld      [tmp3 + %lo(dcache_size)], tmp3                         ;\
  61         cmp     tmp2, FLUSHMATCH_TYPE                                   ;\
  62         be,pt   %icc, 3f                                                ;\
  63         nop                                                             ;\
  64         /*                                                              \
  65          * flushtype = FLUSHALL_TYPE, flush the whole thing             \
  66          * tmp3 = cache size                                            \
  67          * tmp1 = cache line size                                       \
  68          */                                                             \
  69         sub     tmp3, tmp1, tmp2                                        ;\
  70 4:                                                                      \
  71         stxa    %g0, [tmp2]ASI_DC_TAG                                   ;\
  72         membar  #Sync                                                   ;\
  73         cmp     %g0, tmp2                                               ;\
  74         bne,pt  %icc, 4b                                                ;\
  75         sub     tmp2, tmp1, tmp2                                        ;\
  76         ba,pt   %icc, 1f                                                ;\
  77         nop                                                             ;\
  78         /*                                                              \
  79          * flushtype = FLUSHPAGE_TYPE                                   \
  80          * arg1 = tag to compare against                                \
  81          * arg2 = virtual color                                         \
  82          * tmp1 = cache line size                                       \
  83          * tmp2 = tag from cache                                        \
  84          * tmp3 = counter                                               \
  85          */                                                             \
  86 2:                                                                      \
  87         set     MMU_PAGESIZE, tmp3                                      ;\
  88         sllx    arg2, MMU_PAGESHIFT, arg2  /* color to dcache page */   ;\
  89         sub     tmp3, tmp1, tmp3                                        ;\
  90 4:                                                                      \
  91         ldxa    [arg2 + tmp3]ASI_DC_TAG, tmp2   /* read tag */          ;\
  92         btst    SF_DC_VBIT_MASK, tmp2                                   ;\
  93         bz,pn   %icc, 5f          /* branch if no valid sub-blocks */   ;\
  94         andn    tmp2, SF_DC_VBIT_MASK, tmp2     /* clear out v bits */  ;\
  95         cmp     tmp2, arg1                                              ;\
  96         bne,pn  %icc, 5f                        /* br if tag miss */    ;\
  97         nop                                                             ;\
  98         stxa    %g0, [arg2 + tmp3]ASI_DC_TAG                            ;\
  99         membar  #Sync                                                   ;\
 100 5:                                                                      \
 101         cmp     %g0, tmp3                                               ;\
 102         bnz,pt  %icc, 4b                /* branch if not done */        ;\
 103         sub     tmp3, tmp1, tmp3                                        ;\
 104         ba,pt   %icc, 1f                                                ;\
 105         nop                                                             ;\
 106         /*                                                              \
 107          * flushtype = FLUSHMATCH_TYPE                                  \
 108          * arg1 = tag to compare against                                \
 109          * tmp1 = cache line size                                       \
 110          * tmp3 = cache size                                            \
 111          * arg2 = counter                                               \
 112          * tmp2 = cache tag                                             \
 113          */                                                             \
 114 3:                                                                      \
 115         sub     tmp3, tmp1, arg2                                        ;\
 116 4:                                                                      \
 117         ldxa    [arg2]ASI_DC_TAG, tmp2          /* read tag */          ;\
 118         btst    SF_DC_VBIT_MASK, tmp2                                   ;\
 119         bz,pn   %icc, 5f                /* br if no valid sub-blocks */ ;\
 120         andn    tmp2, SF_DC_VBIT_MASK, tmp2     /* clear out v bits */  ;\
 121         cmp     tmp2, arg1                                              ;\
 122         bne,pn  %icc, 5f                /* branch if tag miss */        ;\
 123         nop                                                             ;\
 124         stxa    %g0, [arg2]ASI_DC_TAG                                   ;\
 125         membar  #Sync                                                   ;\
 126 5:                                                                      \
 127         cmp     %g0, arg2                                               ;\
 128         bne,pt  %icc, 4b                /* branch if not done */        ;\
 129         sub     arg2, tmp1, arg2                                        ;\
 130 1:
 131 
 132 /*
 133  * macro that flushes the entire dcache color
 134  */
 135 #define DCACHE_FLUSHCOLOR(arg, tmp1, tmp2)                              \
 136         ldxa    [%g0]ASI_LSU, tmp1;                                     \
 137         btst    LSU_DC, tmp1;           /* is dcache enabled? */        \
 138         bz,pn   %icc, 1f;                                               \
 139         sethi   %hi(dcache_linesize), tmp1;                             \
 140         ld      [tmp1 + %lo(dcache_linesize)], tmp1;                    \
 141         set     MMU_PAGESIZE, tmp2;                                     \
 142         /*                                                              \
 143          * arg = virtual color                                          \
 144          * tmp2 = page size                                             \
 145          * tmp1 = cache line size                                       \
 146          */                                                             \
 147         sllx    arg, MMU_PAGESHIFT, arg; /* color to dcache page */     \
 148         sub     tmp2, tmp1, tmp2;                                       \
 149 2:                                                                      \
 150         stxa    %g0, [arg + tmp2]ASI_DC_TAG;                            \
 151         membar  #Sync;                                                  \
 152         cmp     %g0, tmp2;                                              \
 153         bne,pt  %icc, 2b;                                               \
 154         sub     tmp2, tmp1, tmp2;                                       \
 155 1:
 156 
 157 /*
 158  * macro that flushes the entire dcache
 159  */
 160 #define DCACHE_FLUSHALL(size, linesize, tmp)                            \
 161         ldxa    [%g0]ASI_LSU, tmp;                                      \
 162         btst    LSU_DC, tmp;            /* is dcache enabled? */        \
 163         bz,pn   %icc, 1f;                                               \
 164                                                                         \
 165         sub     size, linesize, tmp;                                    \
 166 2:                                                                      \
 167         stxa    %g0, [tmp]ASI_DC_TAG;                                   \
 168         membar  #Sync;                                                  \
 169         cmp     %g0, tmp;                                               \
 170         bne,pt  %icc, 2b;                                               \
 171         sub     tmp, linesize, tmp;                                     \
 172 1:
 173 
 174 /*
 175  * macro that flushes the entire icache
 176  */
 177 #define ICACHE_FLUSHALL(size, linesize, tmp)                            \
 178         ldxa    [%g0]ASI_LSU, tmp;                                      \
 179         btst    LSU_IC, tmp;                                            \
 180         bz,pn   %icc, 1f;                                               \
 181                                                                         \
 182         sub     size, linesize, tmp;                                    \
 183 2:                                                                      \
 184         stxa    %g0, [tmp]ASI_IC_TAG;                                   \
 185         membar  #Sync;                                                  \
 186         cmp     %g0, tmp;                                               \
 187         bne,pt  %icc, 2b;                                               \
 188         sub     tmp, linesize, tmp;                                     \
 189 1:
 190 
 191 #ifdef SF_ERRATA_32
 192 #define SF_WORKAROUND(tmp1, tmp2)                               \
 193         sethi   %hi(FLUSH_ADDR), tmp2                           ;\
 194         set     MMU_PCONTEXT, tmp1                              ;\
 195         stxa    %g0, [tmp1]ASI_DMMU                             ;\
 196         flush   tmp2                                            ;
 197 #else
 198 #define SF_WORKAROUND(tmp1, tmp2)
 199 #endif /* SF_ERRATA_32 */
 200 
 201 /*
 202  * arg1 = vaddr
 203  * arg2 = ctxnum
 204  *      - disable interrupts and clear address mask
 205  *        to access 64 bit physaddr
 206  *      - Blow out the TLB, flush user page.
 207  *        . use secondary context.
 208  */
 209 #define VTAG_FLUSHUPAGE(lbl, arg1, arg2, tmp1, tmp2, tmp3, tmp4) \
 210         rdpr    %pstate, tmp1                                   ;\
 211         andn    tmp1, PSTATE_IE, tmp2                           ;\
 212         wrpr    tmp2, 0, %pstate                                ;\
 213         sethi   %hi(FLUSH_ADDR), tmp2                           ;\
 214         set     MMU_SCONTEXT, tmp3                              ;\
 215         ldxa    [tmp3]ASI_DMMU, tmp4                            ;\
 216         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, arg1, arg1      ;\
 217         cmp     tmp4, arg2                                      ;\
 218         be,a,pt %icc, lbl/**/4                                  ;\
 219           nop                                                   ;\
 220         stxa    arg2, [tmp3]ASI_DMMU                            ;\
 221 lbl/**/4:                                                       ;\
 222         stxa    %g0, [arg1]ASI_DTLB_DEMAP                       ;\
 223         stxa    %g0, [arg1]ASI_ITLB_DEMAP                       ;\
 224         flush   tmp2                                            ;\
 225         be,a,pt %icc, lbl/**/5                                  ;\
 226           nop                                                   ;\
 227         stxa    tmp4, [tmp3]ASI_DMMU                            ;\
 228         flush   tmp2                                            ;\
 229 lbl/**/5:                                                       ;\
 230         wrpr    %g0, tmp1, %pstate
 231 
 232         
 233 /*
 234  * macro that flushes all the user entries in dtlb
 235  * arg1 = dtlb entries
 236  *      - Before first compare:
 237  *              tmp4 = tte
 238  *              tmp5 = vaddr
 239  *              tmp6 = cntxnum
 240  */
 241 #define DTLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
 242                                 tmp4, tmp5, tmp6) \
 243 lbl/**/0:                                                       ;\
 244         sllx    arg1, 3, tmp3                                   ;\
 245         SF_WORKAROUND(tmp1, tmp2)                               ;\
 246         ldxa    [tmp3]ASI_DTLB_ACCESS, tmp4                     ;\
 247         srlx    tmp4, 6, tmp4                                   ;\
 248         andcc   tmp4, 1, %g0                                    ;\
 249         bnz,pn  %xcc, lbl/**/1                                  ;\
 250         srlx    tmp4, 57, tmp4                                  ;\
 251         andcc   tmp4, 1, %g0                                    ;\
 252         beq,pn  %xcc, lbl/**/1                                  ;\
 253           nop                                                   ;\
 254         set     TAGREAD_CTX_MASK, tmp1                          ;\
 255         ldxa    [tmp3]ASI_DTLB_TAGREAD, tmp2                    ;\
 256         and     tmp2, tmp1, tmp6                                ;\
 257         andn    tmp2, tmp1, tmp5                                ;\
 258         set     KCONTEXT, tmp4                                  ;\
 259         cmp     tmp6, tmp4                                      ;\
 260         be      lbl/**/1                                        ;\
 261           nop                                                   ;\
 262         VTAG_FLUSHUPAGE(VD/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
 263 lbl/**/1:                                                       ;\
 264         brgz,pt arg1, lbl/**/0                                  ;\
 265           sub     arg1, 1, arg1
 266 
 267 
 268 /*
 269  * macro that flushes all the user entries in itlb      
 270  * arg1 = itlb entries
 271  *      - Before first compare:
 272  *              tmp4 = tte
 273  *              tmp5 = vaddr
 274  *              tmp6 = cntxnum
 275  */
 276 #define ITLB_FLUSH_UNLOCKED_UCTXS(lbl, arg1, tmp1, tmp2, tmp3, \
 277                                 tmp4, tmp5, tmp6) \
 278 lbl/**/0:                                                       ;\
 279         sllx    arg1, 3, tmp3                                   ;\
 280         SF_WORKAROUND(tmp1, tmp2)                               ;\
 281         ldxa    [tmp3]ASI_ITLB_ACCESS, tmp4                     ;\
 282         srlx    tmp4, 6, tmp4                                   ;\
 283         andcc   tmp4, 1, %g0                                    ;\
 284         bnz,pn  %xcc, lbl/**/1                                  ;\
 285         srlx    tmp4, 57, tmp4                                  ;\
 286         andcc   tmp4, 1, %g0                                    ;\
 287         beq,pn  %xcc, lbl/**/1                                  ;\
 288           nop                                                   ;\
 289         set     TAGREAD_CTX_MASK, tmp1                          ;\
 290         ldxa    [tmp3]ASI_ITLB_TAGREAD, tmp2                    ;\
 291         and     tmp2, tmp1, tmp6                                ;\
 292         andn    tmp2, tmp1, tmp5                                ;\
 293         set     KCONTEXT, tmp4                                  ;\
 294         cmp     tmp6, tmp4                                      ;\
 295         be      lbl/**/1                                        ;\
 296           nop                                                   ;\
 297         VTAG_FLUSHUPAGE(VI/**/lbl, tmp5, tmp6, tmp1, tmp2, tmp3, tmp4) ;\
 298 lbl/**/1:                                                       ;\
 299         brgz,pt arg1, lbl/**/0                                  ;\
 300         sub     arg1, 1, arg1
 301 
 302 
 303         
 304 /*
 305  * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private'
 306  * ptr is in the machcpu structure.
 307  * r_or_s:      Register or symbol off offset from 'cpu_private' ptr.
 308  * scr1:        Scratch, ptr is returned in this register.
 309  * scr2:        Scratch
 310  */
 311 #define GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr2, label)          \
 312         CPU_ADDR(scr1, scr2);                                           \
 313         ldn     [scr1 + CPU_PRIVATE], scr1;                             \
 314         cmp     scr1, 0;                                                \
 315         be      label;                                                  \
 316          nop;                                                           \
 317         add     scr1, r_or_s, scr1;                                     \
 318 
 319 #ifdef HUMMINGBIRD
 320 /*
 321  * UltraSPARC-IIe processor supports both 4-way set associative and
 322  * direct map E$. For performance reasons, we flush E$ by placing it
 323  * in direct map mode for data load/store and restore the state after
 324  * we are done flushing it. Keep interrupts off while flushing in this
 325  * manner.
 326  *
 327  * We flush the entire ecache by starting at one end and loading each
 328  * successive ecache line for the 2*ecache-size range. We have to repeat
 329  * the flush operation to guarantee that the entire ecache has been
 330  * flushed.
 331  *
 332  * For flushing a specific physical address, we start at the aliased
 333  * address and load at set-size stride, wrapping around at 2*ecache-size
 334  * boundary and skipping the physical address being flushed. It takes
 335  * 10 loads to guarantee that the physical address has been flushed.
 336  */
 337 
 338 #define HB_ECACHE_FLUSH_CNT     2
 339 #define HB_PHYS_FLUSH_CNT       10      /* #loads to flush specific paddr */
 340 #endif /* HUMMINGBIRD */
 341 
 342 /* END CSTYLED */
 343 
 344 /*
 345  * Spitfire MMU and Cache operations.
 346  */
 347 
 348         ENTRY_NP(vtag_flushpage)
 349         /*
 350          * flush page from the tlb
 351          *
 352          * %o0 = vaddr
 353          * %o1 = sfmmup
 354          */
 355         rdpr    %pstate, %o5
 356 #ifdef DEBUG
 357         PANIC_IF_INTR_DISABLED_PSTR(%o5, sfdi_label1, %g1)
 358 #endif /* DEBUG */
 359         /*
 360          * disable ints
 361          */
 362         andn    %o5, PSTATE_IE, %o4
 363         wrpr    %o4, 0, %pstate
 364 
 365         /*
 366          * Then, blow out the tlb
 367          * Interrupts are disabled to prevent the secondary ctx register
 368          * from changing underneath us.
 369          */
 370         sethi   %hi(ksfmmup), %o3
 371         ldx     [%o3 + %lo(ksfmmup)], %o3
 372         cmp     %o3, %o1
 373         bne,pt   %xcc, 1f                       ! if not kernel as, go to 1
 374           sethi %hi(FLUSH_ADDR), %o3
 375         /*
 376          * For KCONTEXT demaps use primary. type = page implicitly
 377          */
 378         stxa    %g0, [%o0]ASI_DTLB_DEMAP        /* dmmu flush for KCONTEXT */
 379         stxa    %g0, [%o0]ASI_ITLB_DEMAP        /* immu flush for KCONTEXT */
 380         flush   %o3
 381         b       5f
 382           nop
 383 1:
 384         /*
 385          * User demap.  We need to set the secondary context properly.
 386          * %o0 = vaddr
 387          * %o1 = sfmmup
 388          * %o3 = FLUSH_ADDR
 389          */
 390         SFMMU_CPU_CNUM(%o1, %g1, %g2)   /* %g1 = sfmmu cnum on this CPU */
 391         
 392         set     MMU_SCONTEXT, %o4
 393         ldxa    [%o4]ASI_DMMU, %o2              /* rd old ctxnum */
 394         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, %o0, %o0
 395         cmp     %o2, %g1
 396         be,pt   %icc, 4f
 397           nop
 398         stxa    %g1, [%o4]ASI_DMMU              /* wr new ctxum */
 399 4:
 400         stxa    %g0, [%o0]ASI_DTLB_DEMAP
 401         stxa    %g0, [%o0]ASI_ITLB_DEMAP
 402         flush   %o3
 403         be,pt   %icc, 5f
 404           nop
 405         stxa    %o2, [%o4]ASI_DMMU              /* restore old ctxnum */
 406         flush   %o3
 407 5:
 408         retl
 409           wrpr  %g0, %o5, %pstate               /* enable interrupts */
 410         SET_SIZE(vtag_flushpage)
 411         
 412         .seg    ".text"
 413 .flushallmsg:
 414         .asciz  "sfmmu_asm: unimplemented flush operation"
 415 
 416         ENTRY_NP(vtag_flushall)
 417         sethi   %hi(.flushallmsg), %o0
 418         call    panic
 419           or    %o0, %lo(.flushallmsg), %o0
 420         SET_SIZE(vtag_flushall)
 421 
 422         ENTRY_NP(vtag_flushall_uctxs)
 423         /*
 424          * flush entire DTLB/ITLB.
 425          */
 426         CPU_INDEX(%g1, %g2)
 427         mulx    %g1, CPU_NODE_SIZE, %g1
 428         set     cpunodes, %g2
 429         add     %g1, %g2, %g1
 430         lduh    [%g1 + ITLB_SIZE], %g2          ! %g2 = # entries in ITLB
 431         lduh    [%g1 + DTLB_SIZE], %g1          ! %g1 = # entries in DTLB
 432         sub     %g2, 1, %g2                     ! %g2 = # entries in ITLB - 1
 433         sub     %g1, 1, %g1                     ! %g1 = # entries in DTLB - 1
 434 
 435         !
 436         ! Flush itlb's
 437         !
 438         ITLB_FLUSH_UNLOCKED_UCTXS(I, %g2, %g3, %g4, %o2, %o3, %o4, %o5)
 439 
 440         !
 441         ! Flush dtlb's
 442         !
 443         DTLB_FLUSH_UNLOCKED_UCTXS(D, %g1, %g3, %g4, %o2, %o3, %o4, %o5)
 444 
 445         membar  #Sync
 446         retl
 447           nop
 448         
 449         SET_SIZE(vtag_flushall_uctxs)
 450 
 451         ENTRY_NP(vtag_flushpage_tl1)
 452         /*
 453          * x-trap to flush page from tlb and tsb
 454          *
 455          * %g1 = vaddr, zero-extended on 32-bit kernel
 456          * %g2 = sfmmup
 457          *
 458          * assumes TSBE_TAG = 0
 459          */
 460         srln    %g1, MMU_PAGESHIFT, %g1
 461         slln    %g1, MMU_PAGESHIFT, %g1                 /* g1 = vaddr */
 462         
 463         SFMMU_CPU_CNUM(%g2, %g3, %g4)   /* %g3 = sfmmu cnum on this CPU */
 464 
 465         /* We need to set the secondary context properly. */
 466         set     MMU_SCONTEXT, %g4
 467         ldxa    [%g4]ASI_DMMU, %g5              /* rd old ctxnum */
 468         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
 469         stxa    %g3, [%g4]ASI_DMMU              /* wr new ctxum */
 470         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 471         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 472         stxa    %g5, [%g4]ASI_DMMU              /* restore old ctxnum */
 473         membar #Sync
 474         retry
 475         SET_SIZE(vtag_flushpage_tl1)
 476 
 477         ENTRY_NP(vtag_flush_pgcnt_tl1)
 478         /*
 479          * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb
 480          *
 481          * %g1 = vaddr, zero-extended on 32-bit kernel
 482          * %g2 = <sfmmup58 | pgcnt6>
 483          *
 484          * NOTE: this handler relies on the fact that no
 485          *      interrupts or traps can occur during the loop
 486          *      issuing the TLB_DEMAP operations. It is assumed
 487          *      that interrupts are disabled and this code is
 488          *      fetching from the kernel locked text address.
 489          *
 490          * assumes TSBE_TAG = 0
 491          */
 492         srln    %g1, MMU_PAGESHIFT, %g1
 493         slln    %g1, MMU_PAGESHIFT, %g1         /* g1 = vaddr */
 494         or      DEMAP_SECOND | DEMAP_PAGE_TYPE, %g1, %g1
 495         
 496         set     SFMMU_PGCNT_MASK, %g4
 497         and     %g4, %g2, %g3                   /* g3 = pgcnt - 1 */
 498         add     %g3, 1, %g3                     /* g3 = pgcnt */
 499 
 500         andn    %g2, SFMMU_PGCNT_MASK, %g2      /* g2 = sfmmup */
 501 
 502         SFMMU_CPU_CNUM(%g2, %g5, %g6)   ! %g5 = sfmmu cnum on this CPU
 503 
 504         /* We need to set the secondary context properly. */
 505         set     MMU_SCONTEXT, %g4
 506         ldxa    [%g4]ASI_DMMU, %g6              /* read old ctxnum */
 507         stxa    %g5, [%g4]ASI_DMMU              /* write new ctxum */
 508 
 509         set     MMU_PAGESIZE, %g2               /* g2 = pgsize */
 510         sethi    %hi(FLUSH_ADDR), %g5
 511 1:
 512         stxa    %g0, [%g1]ASI_DTLB_DEMAP
 513         stxa    %g0, [%g1]ASI_ITLB_DEMAP
 514         flush   %g5
 515         deccc   %g3                             /* decr pgcnt */
 516         bnz,pt  %icc,1b
 517           add   %g1, %g2, %g1                   /* go to nextpage */
 518 
 519         stxa    %g6, [%g4]ASI_DMMU              /* restore old ctxnum */
 520         membar #Sync
 521         retry
 522         SET_SIZE(vtag_flush_pgcnt_tl1)
 523 
 524         ! Not implemented on US1/US2
 525         ENTRY_NP(vtag_flushall_tl1)
 526         retry
 527         SET_SIZE(vtag_flushall_tl1)
 528 
 529 /*
 530  * vac_flushpage(pfnum, color)
 531  *      Flush 1 8k page of the D-$ with physical page = pfnum
 532  *      Algorithm:
 533  *              The spitfire dcache is a 16k direct mapped virtual indexed,
 534  *              physically tagged cache.  Given the pfnum we read all cache
 535  *              lines for the corresponding page in the cache (determined by
 536  *              the color).  Each cache line is compared with
 537  *              the tag created from the pfnum. If the tags match we flush
 538  *              the line.
 539  */
 540         .seg    ".data"
 541         .align  8
 542         .global dflush_type
 543 dflush_type:
 544         .word   FLUSHPAGE_TYPE
 545         .seg    ".text"
 546 
 547         ENTRY(vac_flushpage)
 548         /*
 549          * flush page from the d$
 550          *
 551          * %o0 = pfnum, %o1 = color
 552          */
 553         DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4)
 554         retl
 555         nop
 556         SET_SIZE(vac_flushpage)
 557 
 558         ENTRY_NP(vac_flushpage_tl1)
 559         /*
 560          * x-trap to flush page from the d$
 561          *
 562          * %g1 = pfnum, %g2 = color
 563          */
 564         DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5)
 565         retry
 566         SET_SIZE(vac_flushpage_tl1)
 567 
 568         ENTRY(vac_flushcolor)
 569         /*
 570          * %o0 = vcolor
 571          */
 572         DCACHE_FLUSHCOLOR(%o0, %o1, %o2)
 573         retl
 574           nop
 575         SET_SIZE(vac_flushcolor)
 576 
 577         ENTRY(vac_flushcolor_tl1)
 578         /*
 579          * %g1 = vcolor
 580          */
 581         DCACHE_FLUSHCOLOR(%g1, %g2, %g3)
 582         retry
 583         SET_SIZE(vac_flushcolor_tl1)
 584 
 585 
 586         .global _dispatch_status_busy
 587 _dispatch_status_busy:
 588         .asciz  "ASI_INTR_DISPATCH_STATUS error: busy"
 589         .align  4
 590 
 591 /*
 592  * Determine whether or not the IDSR is busy.
 593  * Entry: no arguments
 594  * Returns: 1 if busy, 0 otherwise
 595  */
 596         ENTRY(idsr_busy)
 597         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 598         clr     %o0
 599         btst    IDSR_BUSY, %g1
 600         bz,a,pt %xcc, 1f
 601         mov     1, %o0
 602 1:
 603         retl
 604         nop
 605         SET_SIZE(idsr_busy)
 606         
 607 /*
 608  * Setup interrupt dispatch data registers
 609  * Entry:
 610  *      %o0 - function or inumber to call
 611  *      %o1, %o2 - arguments (2 uint64_t's)
 612  */
 613         .seg "text"
 614 
 615         ENTRY(init_mondo)
 616 #ifdef DEBUG
 617         !
 618         ! IDSR should not be busy at the moment
 619         !
 620         ldxa    [%g0]ASI_INTR_DISPATCH_STATUS, %g1
 621         btst    IDSR_BUSY, %g1
 622         bz,pt   %xcc, 1f
 623         nop
 624 
 625         sethi   %hi(_dispatch_status_busy), %o0
 626         call    panic
 627         or      %o0, %lo(_dispatch_status_busy), %o0
 628 #endif /* DEBUG */
 629 
 630         ALTENTRY(init_mondo_nocheck)
 631         !
 632         ! interrupt vector dispach data reg 0
 633         !
 634 1:
 635         mov     IDDR_0, %g1
 636         mov     IDDR_1, %g2
 637         mov     IDDR_2, %g3
 638         stxa    %o0, [%g1]ASI_INTR_DISPATCH
 639 
 640         !
 641         ! interrupt vector dispach data reg 1
 642         !
 643         stxa    %o1, [%g2]ASI_INTR_DISPATCH
 644 
 645         !
 646         ! interrupt vector dispach data reg 2
 647         !
 648         stxa    %o2, [%g3]ASI_INTR_DISPATCH
 649 
 650         retl
 651         membar  #Sync                   ! allowed to be in the delay slot
 652         SET_SIZE(init_mondo)
 653 
 654 /*
 655  * Ship mondo to upaid
 656  */
 657         ENTRY_NP(shipit)
 658         sll     %o0, IDCR_PID_SHIFT, %g1        ! IDCR<18:14> = upa id
 659         or      %g1, IDCR_OFFSET, %g1           ! IDCR<13:0> = 0x70
 660         stxa    %g0, [%g1]ASI_INTR_DISPATCH     ! interrupt vector dispatch
 661 #if defined(SF_ERRATA_54)
 662         membar  #Sync                           ! store must occur before load
 663         mov     0x20, %g3                       ! UDBH Control Register Read
 664         ldxa    [%g3]ASI_SDB_INTR_R, %g0
 665 #endif
 666         retl
 667         membar  #Sync
 668         SET_SIZE(shipit)
 669 
 670 
 671 /*
 672  * flush_instr_mem:
 673  *      Flush a portion of the I-$ starting at vaddr
 674  *      %o0 vaddr
 675  *      %o1 bytes to be flushed
 676  */
 677 
 678         ENTRY(flush_instr_mem)
 679         membar  #StoreStore                             ! Ensure the stores
 680                                                         ! are globally visible
 681 1:
 682         flush   %o0
 683         subcc   %o1, ICACHE_FLUSHSZ, %o1                ! bytes = bytes-0x20
 684         bgu,pt  %ncc, 1b
 685         add     %o0, ICACHE_FLUSHSZ, %o0                ! vaddr = vaddr+0x20
 686 
 687         retl
 688         nop
 689         SET_SIZE(flush_instr_mem)
 690 
 691 /*
 692  * flush_ecache:
 693  * Flush the entire e$ using displacement flush by reading through a
 694  * physically contiguous area. We use mmu bypass asi (ASI_MEM) while
 695  * reading this physical address range so that data doesn't go to d$.
 696  * incoming arguments:
 697  *      %o0 - 64 bit physical address
 698  *      %o1 - size of address range to read
 699  *      %o2 - ecache linesize
 700  */
 701         ENTRY(flush_ecache)
 702 #ifndef HUMMINGBIRD
 703         b       2f
 704           nop
 705 1:
 706         ldxa    [%o0 + %o1]ASI_MEM, %g0 ! start reading from physaddr + size
 707 2:
 708         subcc   %o1, %o2, %o1
 709         bcc,a,pt %ncc, 1b
 710           nop
 711 
 712 #else /* HUMMINGBIRD */
 713         /*
 714          * UltraSPARC-IIe processor supports both 4-way set associative
 715          * and direct map E$. For performance reasons, we flush E$ by
 716          * placing it in direct map mode for data load/store and restore
 717          * the state after we are done flushing it. It takes 2 iterations
 718          * to guarantee that the entire ecache has been flushed.
 719          *
 720          * Keep the interrupts disabled while flushing E$ in this manner.
 721          */
 722         rdpr    %pstate, %g4            ! current pstate (restored later)
 723         andn    %g4, PSTATE_IE, %g5
 724         wrpr    %g0, %g5, %pstate       ! disable interrupts
 725 
 726         ! Place E$ in direct map mode for data access
 727         or      %g0, 1, %g5
 728         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
 729         ldxa    [%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
 730         or      %g1, %g5, %g5
 731         membar  #Sync
 732         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
 733         membar  #Sync
 734 
 735         ! flush entire ecache HB_ECACHE_FLUSH_CNT times
 736         mov     HB_ECACHE_FLUSH_CNT-1, %g5
 737 2:
 738         sub     %o1, %o2, %g3           ! start from last entry
 739 1:
 740         ldxa    [%o0 + %g3]ASI_MEM, %g0 ! start reading from physaddr + size
 741         subcc   %g3, %o2, %g3
 742         bgeu,a,pt %ncc, 1b
 743           nop
 744         brgz,a,pt %g5, 2b
 745           dec   %g5
 746 
 747         membar  #Sync
 748         stxa    %g1, [%g0]ASI_UPA_CONFIG ! restore UPA config reg
 749         membar  #Sync
 750         wrpr    %g0, %g4, %pstate       ! restore earlier pstate
 751 #endif /* HUMMINGBIRD */
 752 
 753         retl
 754         nop
 755         SET_SIZE(flush_ecache)
 756 
 757 /*
 758  * void kdi_flush_idcache(int dcache_size, int dcache_linesize,
 759  *                      int icache_size, int icache_linesize)
 760  */
 761         ENTRY(kdi_flush_idcache)
 762         DCACHE_FLUSHALL(%o0, %o1, %g1)
 763         ICACHE_FLUSHALL(%o2, %o3, %g1)
 764         membar  #Sync
 765         retl
 766         nop
 767         SET_SIZE(kdi_flush_idcache)
 768         
 769 
 770 /*
 771  * void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
 772  *                      uint64_t *oafsr, uint64_t *acc_afsr)
 773  *
 774  * Get ecache data and tag.  The ecache_idx argument is assumed to be aligned
 775  * on a 64-byte boundary.  The corresponding AFSR value is also read for each
 776  * 8 byte ecache data obtained. The ecache data is assumed to be a pointer
 777  * to an array of 16 uint64_t's (e$data & afsr value).  The action to read the
 778  * data and tag should be atomic to make sense.  We will be executing at PIL15
 779  * and will disable IE, so nothing can occur between the two reads.  We also
 780  * assume that the execution of this code does not interfere with what we are
 781  * reading - not really possible, but we'll live with it for now.
 782  * We also pass the old AFSR value before clearing it, and caller will take
 783  * appropriate actions if the important bits are non-zero. 
 784  *
 785  * If the caller wishes to track the AFSR in cases where the CP bit is
 786  * set, an address should be passed in for acc_afsr.  Otherwise, this
 787  * argument may be null.
 788  *
 789  * Register Usage:
 790  * i0: In: 32-bit e$ index
 791  * i1: In: addr of e$ data
 792  * i2: In: addr of e$ tag
 793  * i3: In: addr of old afsr
 794  * i4: In: addr of accumulated afsr - may be null
 795  */
 796         ENTRY(get_ecache_dtag)
 797         save    %sp, -SA(MINFRAME), %sp
 798         or      %g0, 1, %l4
 799         sllx    %l4, 39, %l4    ! set bit 39 for e$ data access
 800         or      %i0, %l4, %g6   ! %g6 = e$ addr for data read
 801         sllx    %l4, 1, %l4     ! set bit 40 for e$ tag access
 802         or      %i0, %l4, %l4   ! %l4 = e$ addr for tag read
 803 
 804         rdpr    %pstate, %i5
 805         andn    %i5, PSTATE_IE | PSTATE_AM, %i0
 806         wrpr    %i0, %g0, %pstate       ! clear IE, AM bits
 807 
 808         ldxa    [%g0]ASI_ESTATE_ERR, %g1
 809         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
 810         membar  #Sync
 811 
 812         ldxa    [%g0]ASI_AFSR, %i0      ! grab the old-afsr before tag read
 813         stx     %i0, [%i3]              ! write back the old-afsr
 814 
 815         ldxa    [%l4]ASI_EC_R, %g0      ! read tag into E$ tag reg
 816         ldxa    [%g0]ASI_EC_DIAG, %i0   ! read tag from E$ tag reg
 817         stx     %i0, [%i2]              ! write back tag result
 818 
 819         clr     %i2                     ! loop count
 820 
 821         brz     %i4, 1f                 ! acc_afsr == NULL?
 822           ldxa  [%g0]ASI_AFSR, %i0      ! grab the old-afsr before clearing
 823         srlx    %i0, P_AFSR_CP_SHIFT, %l0
 824         btst    1, %l0
 825         bz      1f
 826           nop
 827         ldx     [%i4], %g4
 828         or      %g4, %i0, %g4           ! aggregate AFSR in cpu private
 829         stx     %g4, [%i4]
 830 1:
 831         stxa    %i0, [%g0]ASI_AFSR      ! clear AFSR
 832         membar  #Sync
 833         ldxa    [%g6]ASI_EC_R, %i0      ! read the 8byte E$data
 834         stx     %i0, [%i1]              ! save the E$data
 835         add     %g6, 8, %g6
 836         add     %i1, 8, %i1
 837         ldxa    [%g0]ASI_AFSR, %i0      ! read AFSR for this 16byte read
 838         srlx    %i0, P_AFSR_CP_SHIFT, %l0
 839         btst    1, %l0
 840         bz      2f
 841           stx     %i0, [%i1]            ! save the AFSR
 842 
 843         brz     %i4, 2f                 ! acc_afsr == NULL?
 844           nop
 845         ldx     [%i4], %g4
 846         or      %g4, %i0, %g4           ! aggregate AFSR in cpu private
 847         stx     %g4, [%i4]
 848 2:
 849         add     %i2, 8, %i2
 850         cmp     %i2, 64
 851         bl,a    1b
 852           add     %i1, 8, %i1
 853         stxa    %i0, [%g0]ASI_AFSR              ! clear AFSR
 854         membar  #Sync
 855         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
 856         membar  #Sync
 857         wrpr    %g0, %i5, %pstate
 858         ret
 859           restore
 860         SET_SIZE(get_ecache_dtag)
 861 
 862         ENTRY_NP(ce_err)
 863         ldxa    [%g0]ASI_AFSR, %g3      ! save afsr in g3
 864 
 865         !
 866         ! Check for a UE... From Kevin.Normoyle:
 867         ! We try to switch to the trap for the UE, but since that's
 868         ! a hardware pipeline, we might get to the CE trap before we
 869         ! can switch. The UDB and AFSR registers will have both the
 870         ! UE and CE bits set but the UDB syndrome and the AFAR will be
 871         ! for the UE.
 872         !
 873         or      %g0, 1, %g1             ! put 1 in g1
 874         sllx    %g1, 21, %g1            ! shift left to <21> afsr UE
 875         andcc   %g1, %g3, %g0           ! check for UE in afsr
 876         bnz     async_err               ! handle the UE, not the CE
 877           or    %g0, 0x63, %g5          ! pass along the CE ttype
 878         !
 879         ! Disable further CE traps to avoid recursion (stack overflow)
 880         ! and staying above XCALL_PIL for extended periods.
 881         !
 882         ldxa    [%g0]ASI_ESTATE_ERR, %g2
 883         andn    %g2, 0x1, %g2           ! clear bit 0 - CEEN
 884         stxa    %g2, [%g0]ASI_ESTATE_ERR
 885         membar  #Sync                   ! required
 886         !
 887         ! handle the CE
 888         ldxa    [%g0]ASI_AFAR, %g2      ! save afar in g2
 889 
 890         set     P_DER_H, %g4            ! put P_DER_H in g4
 891         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
 892         or      %g0, 1, %g6             ! put 1 in g6
 893         sllx    %g6, 8, %g6             ! shift g6 to <8> sdb CE
 894         andcc   %g5, %g6, %g1           ! check for CE in upper half
 895         sllx    %g5, 33, %g5            ! shift upper bits to <42:33>
 896         or      %g3, %g5, %g3           ! or with afsr bits
 897         bz,a    1f                      ! no error, goto 1f
 898           nop
 899         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
 900         membar  #Sync                   ! membar sync required
 901 1:
 902         set     P_DER_L, %g4            ! put P_DER_L in g4
 903         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g6
 904         andcc   %g5, %g6, %g1           ! check for CE in lower half
 905         sllx    %g5, 43, %g5            ! shift upper bits to <52:43>
 906         or      %g3, %g5, %g3           ! or with afsr bits
 907         bz,a    2f                      ! no error, goto 2f
 908           nop
 909         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg error bit
 910         membar  #Sync                   ! membar sync required
 911 2:
 912         or      %g0, 1, %g4             ! put 1 in g4
 913         sllx    %g4, 20, %g4            ! shift left to <20> afsr CE
 914         stxa    %g4, [%g0]ASI_AFSR      ! use g4 to clear afsr CE error
 915         membar  #Sync                   ! membar sync required
 916 
 917         set     cpu_ce_error, %g1       ! put *cpu_ce_error() in g1
 918         rdpr    %pil, %g6               ! read pil into %g6
 919         subcc   %g6, PIL_15, %g0
 920           movneg        %icc, PIL_14, %g4 ! run at pil 14 unless already at 15
 921         sethi   %hi(sys_trap), %g5
 922         jmp     %g5 + %lo(sys_trap)     ! goto sys_trap
 923           movge %icc, PIL_15, %g4       ! already at pil 15
 924         SET_SIZE(ce_err)
 925 
 926         ENTRY_NP(ce_err_tl1)
 927 #ifndef TRAPTRACE
 928         ldxa    [%g0]ASI_AFSR, %g7
 929         stxa    %g7, [%g0]ASI_AFSR
 930         membar  #Sync
 931         retry
 932 #else
 933         set     ce_trap_tl1, %g1
 934         sethi   %hi(dis_err_panic1), %g4
 935         jmp     %g4 + %lo(dis_err_panic1)
 936         nop
 937 #endif
 938         SET_SIZE(ce_err_tl1)
 939 
 940 #ifdef  TRAPTRACE
 941 .celevel1msg:
 942         .asciz  "Softerror with trap tracing at tl1: AFAR 0x%08x.%08x AFSR 0x%08x.%08x";
 943 
 944         ENTRY_NP(ce_trap_tl1)
 945         ! upper 32 bits of AFSR already in o3
 946         mov     %o4, %o0                ! save AFAR upper 32 bits
 947         mov     %o2, %o4                ! lower 32 bits of AFSR
 948         mov     %o1, %o2                ! lower 32 bits of AFAR
 949         mov     %o0, %o1                ! upper 32 bits of AFAR
 950         set     .celevel1msg, %o0
 951         call    panic
 952         nop
 953         SET_SIZE(ce_trap_tl1)
 954 #endif
 955 
 956         !
 957         ! async_err is the assembly glue code to get us from the actual trap
 958         ! into the CPU module's C error handler.  Note that we also branch
 959         ! here from ce_err() above.
 960         !
 961         ENTRY_NP(async_err)
 962         stxa    %g0, [%g0]ASI_ESTATE_ERR ! disable ecc and other cpu errors
 963         membar  #Sync                   ! membar sync required
 964 
 965         ldxa    [%g0]ASI_AFSR, %g3      ! save afsr in g3
 966         ldxa    [%g0]ASI_AFAR, %g2      ! save afar in g2
 967 
 968         sllx    %g5, 53, %g5            ! move ttype to <63:53>
 969         or      %g3, %g5, %g3           ! or to afsr in g3
 970 
 971         or      %g0, 1, %g1             ! put 1 in g1
 972         sllx    %g1, 21, %g1            ! shift left to <21> afsr UE
 973         andcc   %g1, %g3, %g0           ! check for UE in afsr
 974         bz,a,pn %icc, 2f                ! if !UE skip sdb read/clear
 975           nop
 976 
 977         set     P_DER_H, %g4            ! put P_DER_H in g4
 978         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into 56
 979         or      %g0, 1, %g6             ! put 1 in g6
 980         sllx    %g6, 9, %g6             ! shift g6 to <9> sdb UE
 981         andcc   %g5, %g6, %g1           ! check for UE in upper half
 982         sllx    %g5, 33, %g5            ! shift upper bits to <42:33>
 983         or      %g3, %g5, %g3           ! or with afsr bits
 984         bz,a    1f                      ! no error, goto 1f
 985           nop
 986         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
 987         membar  #Sync                   ! membar sync required
 988 1:
 989         set     P_DER_L, %g4            ! put P_DER_L in g4
 990         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
 991         andcc   %g5, %g6, %g1           ! check for UE in lower half
 992         sllx    %g5, 43, %g5            ! shift upper bits to <52:43>
 993         or      %g3, %g5, %g3           ! or with afsr bits
 994         bz,a    2f                      ! no error, goto 2f
 995           nop
 996         stxa    %g1, [%g4]ASI_SDB_INTR_W ! clear sdb reg UE error bit
 997         membar  #Sync                   ! membar sync required
 998 2:
 999         stxa    %g3, [%g0]ASI_AFSR      ! clear all the sticky bits
1000         membar  #Sync                   ! membar sync required
1001 
1002         RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip)
1003 async_err_resetskip:
1004 
1005         set     cpu_async_error, %g1    ! put cpu_async_error in g1
1006         sethi   %hi(sys_trap), %g5
1007         jmp     %g5 + %lo(sys_trap)     ! goto sys_trap
1008           or    %g0, PIL_15, %g4        ! run at pil 15
1009         SET_SIZE(async_err)
1010 
1011         ENTRY_NP(dis_err_panic1)
1012         stxa    %g0, [%g0]ASI_ESTATE_ERR ! disable all error traps
1013         membar  #Sync
1014         ! save destination routine is in g1
1015         ldxa    [%g0]ASI_AFAR, %g2      ! read afar
1016         ldxa    [%g0]ASI_AFSR, %g3      ! read afsr
1017         set     P_DER_H, %g4            ! put P_DER_H in g4
1018         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb upper half into g5
1019         sllx    %g5, 33, %g5            ! shift upper bits to <42:33>
1020         or      %g3, %g5, %g3           ! or with afsr bits
1021         set     P_DER_L, %g4            ! put P_DER_L in g4
1022         ldxa    [%g4]ASI_SDB_INTR_R, %g5 ! read sdb lower half into g5
1023         sllx    %g5, 43, %g5            ! shift upper bits to <52:43>
1024         or      %g3, %g5, %g3           ! or with afsr bits
1025 
1026         RESET_USER_RTT_REGS(%g4, %g5, dis_err_panic1_resetskip)
1027 dis_err_panic1_resetskip:
1028 
1029         sethi   %hi(sys_trap), %g5
1030         jmp     %g5 + %lo(sys_trap)     ! goto sys_trap
1031           sub   %g0, 1, %g4
1032         SET_SIZE(dis_err_panic1)
1033 
1034         ENTRY(clr_datapath)
1035         set     P_DER_H, %o4                    ! put P_DER_H in o4
1036         ldxa    [%o4]ASI_SDB_INTR_R, %o5        ! read sdb upper half into o3
1037         or      %g0, 0x3, %o2                   ! put 0x3 in o2
1038         sllx    %o2, 8, %o2                     ! shift o2 to <9:8> sdb
1039         andcc   %o5, %o2, %o1                   ! check for UE,CE in upper half
1040         bz,a    1f                              ! no error, goto 1f
1041           nop
1042         stxa    %o1, [%o4]ASI_SDB_INTR_W        ! clear sdb reg UE,CE error bits
1043         membar  #Sync                           ! membar sync required
1044 1:
1045         set     P_DER_L, %o4                    ! put P_DER_L in o4
1046         ldxa    [%o4]ASI_SDB_INTR_R, %o5        ! read sdb lower half into o5
1047         andcc   %o5, %o2, %o1                   ! check for UE,CE in lower half
1048         bz,a    2f                              ! no error, goto 2f
1049           nop
1050         stxa    %o1, [%o4]ASI_SDB_INTR_W        ! clear sdb reg UE,CE error bits
1051         membar  #Sync
1052 2:
1053         retl
1054           nop
1055         SET_SIZE(clr_datapath)
1056 
1057         ENTRY(get_udb_errors)
1058         set     P_DER_H, %o3
1059         ldxa    [%o3]ASI_SDB_INTR_R, %o2
1060         stx     %o2, [%o0]
1061         set     P_DER_L, %o3
1062         ldxa    [%o3]ASI_SDB_INTR_R, %o2
1063         retl
1064           stx   %o2, [%o1]
1065         SET_SIZE(get_udb_errors)
1066 
1067 /*
1068  * NB - In Spitfire cpus, when reading a tte from the hardware, we
1069  * need to clear [42-41] because the general definitions in pte.h
1070  * define the PA to be [42-13] whereas Spitfire really uses [40-13].
1071  * When cloning these routines for other cpus the "andn" below is not
1072  * necessary.
1073  */
1074         ENTRY_NP(itlb_rd_entry)
1075         sllx    %o0, 3, %o0
1076 #if defined(SF_ERRATA_32)
1077         sethi   %hi(FLUSH_ADDR), %g2
1078         set     MMU_PCONTEXT, %g1
1079         stxa    %g0, [%g1]ASI_DMMU                      ! KCONTEXT
1080         flush   %g2
1081 #endif
1082         ldxa    [%o0]ASI_ITLB_ACCESS, %g1
1083         set     TTE_SPITFIRE_PFNHI_CLEAR, %g2           ! spitfire only
1084         sllx    %g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2      ! see comment above
1085         andn    %g1, %g2, %g1                           ! for details
1086         stx     %g1, [%o1]
1087         ldxa    [%o0]ASI_ITLB_TAGREAD, %g2
1088         set     TAGREAD_CTX_MASK, %o4
1089         andn    %g2, %o4, %o5
1090         retl
1091           stx   %o5, [%o2]
1092         SET_SIZE(itlb_rd_entry)
1093 
1094         ENTRY_NP(dtlb_rd_entry)
1095         sllx    %o0, 3, %o0
1096 #if defined(SF_ERRATA_32)
1097         sethi   %hi(FLUSH_ADDR), %g2
1098         set     MMU_PCONTEXT, %g1
1099         stxa    %g0, [%g1]ASI_DMMU                      ! KCONTEXT
1100         flush   %g2
1101 #endif
1102         ldxa    [%o0]ASI_DTLB_ACCESS, %g1
1103         set     TTE_SPITFIRE_PFNHI_CLEAR, %g2           ! spitfire only
1104         sllx    %g2, TTE_SPITFIRE_PFNHI_SHIFT, %g2      ! see comment above
1105         andn    %g1, %g2, %g1                           ! itlb_rd_entry
1106         stx     %g1, [%o1]
1107         ldxa    [%o0]ASI_DTLB_TAGREAD, %g2
1108         set     TAGREAD_CTX_MASK, %o4
1109         andn    %g2, %o4, %o5
1110         retl
1111           stx   %o5, [%o2]
1112         SET_SIZE(dtlb_rd_entry)
1113 
1114         ENTRY(set_lsu)
1115         stxa    %o0, [%g0]ASI_LSU               ! store to LSU
1116         retl
1117         membar  #Sync
1118         SET_SIZE(set_lsu)
1119 
1120         ENTRY(get_lsu)
1121         retl
1122         ldxa    [%g0]ASI_LSU, %o0               ! load LSU
1123         SET_SIZE(get_lsu)
1124 
1125         /*
1126          * Clear the NPT (non-privileged trap) bit in the %tick
1127          * registers. In an effort to make the change in the
1128          * tick counter as consistent as possible, we disable
1129          * all interrupts while we're changing the registers. We also
1130          * ensure that the read and write instructions are in the same
1131          * line in the instruction cache.
1132          */
1133         ENTRY_NP(cpu_clearticknpt)
1134         rdpr    %pstate, %g1            /* save processor state */
1135         andn    %g1, PSTATE_IE, %g3     /* turn off */
1136         wrpr    %g0, %g3, %pstate       /*   interrupts */
1137         rdpr    %tick, %g2              /* get tick register */
1138         brgez,pn %g2, 1f                /* if NPT bit off, we're done */
1139         mov     1, %g3                  /* create mask */
1140         sllx    %g3, 63, %g3            /*   for NPT bit */
1141         ba,a,pt %xcc, 2f
1142         .align  64                      /* Align to I$ boundary */
1143 2:
1144         rdpr    %tick, %g2              /* get tick register */
1145         wrpr    %g3, %g2, %tick         /* write tick register, */
1146                                         /*   clearing NPT bit   */
1147 #if defined(BB_ERRATA_1)
1148         rdpr    %tick, %g0              /* read (s)tick (BB_ERRATA_1) */
1149 #endif
1150 1:
1151         jmp     %g4 + 4
1152         wrpr    %g0, %g1, %pstate       /* restore processor state */
1153         SET_SIZE(cpu_clearticknpt)
1154 
1155         /*
1156          * get_ecache_tag()
1157          * Register Usage:
1158          * %o0: In: 32-bit E$ index
1159          *      Out: 64-bit E$ tag value
1160          * %o1: In: 64-bit AFSR value after clearing sticky bits
1161          * %o2: In: address of cpu private afsr storage
1162          */
1163         ENTRY(get_ecache_tag)
1164         or      %g0, 1, %o4
1165         sllx    %o4, 40, %o4                    ! set bit 40 for e$ tag access
1166         or      %o0, %o4, %o4                   ! %o4 = e$ addr for tag read
1167         rdpr    %pstate, %o5
1168         andn    %o5, PSTATE_IE | PSTATE_AM, %o0
1169         wrpr    %o0, %g0, %pstate               ! clear IE, AM bits
1170 
1171         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1172         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1173         membar  #Sync
1174 
1175         ldxa    [%g0]ASI_AFSR, %o0
1176         srlx    %o0, P_AFSR_CP_SHIFT, %o3
1177         btst    1, %o3
1178         bz      1f
1179           nop
1180         ldx     [%o2], %g4
1181         or      %g4, %o0, %g4                   ! aggregate AFSR in cpu private
1182         stx     %g4, [%o2]
1183 1:
1184         stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1185         membar  #Sync
1186 
1187         ldxa    [%o4]ASI_EC_R, %g0
1188         ldxa    [%g0]ASI_EC_DIAG, %o0           ! read tag from e$ tag reg
1189 
1190         ldxa    [%g0]ASI_AFSR, %o3
1191         srlx    %o3, P_AFSR_CP_SHIFT, %o4
1192         btst    1, %o4
1193         bz      2f
1194           stx   %o3, [%o1]                      ! AFSR after sticky clear
1195         ldx     [%o2], %g4
1196         or      %g4, %o3, %g4                   ! aggregate AFSR in cpu private
1197         stx     %g4, [%o2]
1198 2:
1199         membar  #Sync
1200 
1201         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1202         membar  #Sync
1203         retl
1204         wrpr    %g0, %o5, %pstate
1205         SET_SIZE(get_ecache_tag)
1206 
1207         /*
1208          * check_ecache_line()
1209          * Register Usage:
1210          * %o0: In: 32-bit E$ index
1211          *      Out: 64-bit accumulated AFSR
1212          * %o1: In: address of cpu private afsr storage
1213          */
1214         ENTRY(check_ecache_line)
1215         or      %g0, 1, %o4
1216         sllx    %o4, 39, %o4                    ! set bit 39 for e$ data access
1217         or      %o0, %o4, %o4                   ! %o4 = e$ addr for data read
1218 
1219         rdpr    %pstate, %o5
1220         andn    %o5, PSTATE_IE | PSTATE_AM, %o0
1221         wrpr    %o0, %g0, %pstate               ! clear IE, AM bits
1222 
1223         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1224         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1225         membar  #Sync
1226 
1227         ldxa    [%g0]ASI_AFSR, %o0
1228         srlx    %o0, P_AFSR_CP_SHIFT, %o2
1229         btst    1, %o2
1230         bz      1f
1231           clr   %o2                             ! loop count
1232         ldx     [%o1], %o3
1233         or      %o3, %o0, %o3                   ! aggregate AFSR in cpu private
1234         stx     %o3, [%o1]
1235 1: 
1236         stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1237         membar  #Sync
1238 
1239 2:
1240         ldxa    [%o4]ASI_EC_R, %g0              ! Read the E$ data 8bytes each
1241         add     %o2, 1, %o2
1242         cmp     %o2, 8
1243         bl,a    2b
1244           add   %o4, 8, %o4
1245 
1246         membar  #Sync
1247         ldxa    [%g0]ASI_AFSR, %o0              ! read accumulated AFSR
1248         srlx    %o0, P_AFSR_CP_SHIFT, %o2
1249         btst    1, %o2
1250         bz      3f
1251           nop
1252         ldx     [%o1], %o3
1253         or      %o3, %o0, %o3                   ! aggregate AFSR in cpu private
1254         stx     %o3, [%o1]
1255 3:
1256         stxa    %o0, [%g0]ASI_AFSR              ! clear AFSR
1257         membar  #Sync
1258         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1259         membar  #Sync
1260         retl
1261         wrpr    %g0, %o5, %pstate
1262         SET_SIZE(check_ecache_line)
1263 
1264         ENTRY(read_and_clear_afsr)
1265         ldxa    [%g0]ASI_AFSR, %o0
1266         retl
1267           stxa  %o0, [%g0]ASI_AFSR              ! clear AFSR
1268         SET_SIZE(read_and_clear_afsr)
1269 
1270 /*
1271  * scrubphys - Pass in the aligned physical memory address that you want
1272  * to scrub, along with the ecache size.
1273  *
1274  *      1) Displacement flush the E$ line corresponding to %addr.
1275  *         The first ldxa guarantees that the %addr is no longer in
1276  *         M, O, or E (goes to I or S (if instruction fetch also happens).
1277  *      2) "Write" the data using a CAS %addr,%g0,%g0.
1278  *         The casxa guarantees a transition from I to M or S to M.
1279  *      3) Displacement flush the E$ line corresponding to %addr.
1280  *         The second ldxa pushes the M line out of the ecache, into the
1281  *         writeback buffers, on the way to memory.
1282  *      4) The "membar #Sync" pushes the cache line out of the writeback
1283  *         buffers onto the bus, on the way to dram finally.
1284  *
1285  * This is a modified version of the algorithm suggested by Gary Lauterbach.
1286  * In theory the CAS %addr,%g0,%g0 is supposed to mark the addr's cache line
1287  * as modified, but then we found out that for spitfire, if it misses in the
1288  * E$ it will probably install as an M, but if it hits in the E$, then it
1289  * will stay E, if the store doesn't happen. So the first displacement flush
1290  * should ensure that the CAS will miss in the E$.  Arrgh.
1291  */
1292 
1293         ENTRY(scrubphys)
1294         or      %o1, %g0, %o2   ! put ecache size in %o2
1295 #ifndef HUMMINGBIRD
1296         xor     %o0, %o2, %o1   ! calculate alias address
1297         add     %o2, %o2, %o3   ! 2 * ecachesize in case
1298                                 ! addr == ecache_flushaddr
1299         sub     %o3, 1, %o3     ! -1 == mask
1300         and     %o1, %o3, %o1   ! and with xor'd address
1301         set     ecache_flushaddr, %o3
1302         ldx     [%o3], %o3
1303 
1304         rdpr    %pstate, %o4
1305         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1306         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1307 
1308         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1309         casxa   [%o0]ASI_MEM, %g0, %g0
1310         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1311 
1312 #else /* HUMMINGBIRD */
1313         /*
1314          * UltraSPARC-IIe processor supports both 4-way set associative
1315          * and direct map E$. We need to reconfigure E$ to direct map
1316          * mode for data load/store before displacement flush. Also, we
1317          * need to flush all 4 sets of the E$ to ensure that the physaddr
1318          * has been flushed. Keep the interrupts disabled while flushing
1319          * E$ in this manner.
1320          *
1321          * For flushing a specific physical address, we start at the
1322          * aliased address and load at set-size stride, wrapping around
1323          * at 2*ecache-size boundary and skipping fault physical address.
1324          * It takes 10 loads to guarantee that the physical address has
1325          * been flushed.
1326          *
1327          * Usage:
1328          *      %o0     physaddr
1329          *      %o5     physaddr - ecache_flushaddr
1330          *      %g1     UPA config (restored later)
1331          *      %g2     E$ set size
1332          *      %g3     E$ flush address range mask (i.e. 2 * E$ -1)
1333          *      %g4     #loads to flush phys address
1334          *      %g5     temp 
1335          */
1336 
1337         sethi   %hi(ecache_associativity), %g5
1338         ld      [%g5 + %lo(ecache_associativity)], %g5
1339         udivx   %o2, %g5, %g2   ! set size (i.e. ecache_size/#sets)
1340         xor     %o0, %o2, %o1   ! calculate alias address
1341         add     %o2, %o2, %g3   ! 2 * ecachesize in case
1342                                 ! addr == ecache_flushaddr
1343         sub     %g3, 1, %g3     ! 2 * ecachesize -1 == mask
1344         and     %o1, %g3, %o1   ! and with xor'd address
1345         sethi   %hi(ecache_flushaddr), %o3
1346         ldx     [%o3 + %lo(ecache_flushaddr)], %o3
1347 
1348         rdpr    %pstate, %o4
1349         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1350         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1351 
1352         ! Place E$ in direct map mode for data access
1353         or      %g0, 1, %g5
1354         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
1355         ldxa    [%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1356         or      %g1, %g5, %g5
1357         membar  #Sync
1358         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1359         membar  #Sync
1360 
1361         ! Displace cache line from each set of E$ starting at the
1362         ! aliased address. at set-size stride, wrapping at 2*ecache_size
1363         ! and skipping load from physaddr. We need 10 loads to flush the
1364         ! physaddr from E$.
1365         mov     HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1366         sub     %o0, %o3, %o5           ! physaddr - ecache_flushaddr
1367         or      %o1, %g0, %g5           ! starting aliased offset
1368 2:
1369         ldxa    [%g5 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1370 1:
1371         add     %g5, %g2, %g5           ! calculate offset in next set
1372         and     %g5, %g3, %g5           ! force offset within aliased range
1373         cmp     %g5, %o5                ! skip loads from physaddr
1374         be,pn %ncc, 1b
1375           nop
1376         brgz,pt %g4, 2b
1377           dec   %g4
1378 
1379         casxa   [%o0]ASI_MEM, %g0, %g0
1380 
1381         ! Flush %o0 from ecahe again.
1382         ! Need single displacement flush at offset %o1 this time as
1383         ! the E$ is already in direct map mode.
1384         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1385 
1386         membar  #Sync
1387         stxa    %g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1388         membar  #Sync
1389 #endif /* HUMMINGBIRD */
1390         wrpr    %g0, %o4, %pstate       ! restore earlier pstate register value
1391 
1392         retl
1393         membar  #Sync                   ! move the data out of the load buffer
1394         SET_SIZE(scrubphys)
1395 
1396         ENTRY(clearphys)
1397         or      %o2, %g0, %o3   ! ecache linesize
1398         or      %o1, %g0, %o2   ! ecache size
1399 #ifndef HUMMINGBIRD
1400         or      %o3, %g0, %o4   ! save ecache linesize
1401         xor     %o0, %o2, %o1   ! calculate alias address
1402         add     %o2, %o2, %o3   ! 2 * ecachesize
1403         sub     %o3, 1, %o3     ! -1 == mask
1404         and     %o1, %o3, %o1   ! and with xor'd address
1405         set     ecache_flushaddr, %o3
1406         ldx     [%o3], %o3
1407         or      %o4, %g0, %o2   ! saved ecache linesize
1408 
1409         rdpr    %pstate, %o4
1410         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1411         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1412 
1413         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1414         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
1415         membar  #Sync
1416 
1417         ! need to put zeros in the cache line before displacing it
1418 
1419         sub     %o2, 8, %o2     ! get offset of last double word in ecache line
1420 1:
1421         stxa    %g0, [%o0 + %o2]ASI_MEM ! put zeros in the ecache line
1422         sub     %o2, 8, %o2
1423         brgez,a,pt %o2, 1b
1424         nop
1425         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1426         casxa   [%o0]ASI_MEM, %g0, %g0
1427         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1428 
1429         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
1430         membar  #Sync
1431 
1432 #else /* HUMMINGBIRD... */
1433         /*
1434          * UltraSPARC-IIe processor supports both 4-way set associative
1435          * and direct map E$. We need to reconfigure E$ to direct map
1436          * mode for data load/store before displacement flush. Also, we
1437          * need to flush all 4 sets of the E$ to ensure that the physaddr
1438          * has been flushed. Keep the interrupts disabled while flushing
1439          * E$ in this manner.
1440          *
1441          * For flushing a specific physical address, we start at the
1442          * aliased address and load at set-size stride, wrapping around
1443          * at 2*ecache-size boundary and skipping fault physical address.
1444          * It takes 10 loads to guarantee that the physical address has
1445          * been flushed.
1446          *
1447          * Usage:
1448          *      %o0     physaddr
1449          *      %o5     physaddr - ecache_flushaddr
1450          *      %g1     UPA config (restored later)
1451          *      %g2     E$ set size
1452          *      %g3     E$ flush address range mask (i.e. 2 * E$ -1)
1453          *      %g4     #loads to flush phys address
1454          *      %g5     temp 
1455          */
1456 
1457         or      %o3, %g0, %o4   ! save ecache linesize
1458         sethi   %hi(ecache_associativity), %g5
1459         ld      [%g5 + %lo(ecache_associativity)], %g5
1460         udivx   %o2, %g5, %g2   ! set size (i.e. ecache_size/#sets)
1461 
1462         xor     %o0, %o2, %o1   ! calculate alias address
1463         add     %o2, %o2, %g3   ! 2 * ecachesize
1464         sub     %g3, 1, %g3     ! 2 * ecachesize -1 == mask
1465         and     %o1, %g3, %o1   ! and with xor'd address
1466         sethi   %hi(ecache_flushaddr), %o3
1467         ldx     [%o3 +%lo(ecache_flushaddr)], %o3
1468         or      %o4, %g0, %o2   ! saved ecache linesize
1469 
1470         rdpr    %pstate, %o4
1471         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1472         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1473 
1474         ! Place E$ in direct map mode for data access
1475         or      %g0, 1, %g5
1476         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
1477         ldxa    [%g0]ASI_UPA_CONFIG, %g1 ! current UPA config (restored later)
1478         or      %g1, %g5, %g5
1479         membar  #Sync
1480         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1481         membar  #Sync
1482 
1483         ! need to put zeros in the cache line before displacing it
1484 
1485         sub     %o2, 8, %o2     ! get offset of last double word in ecache line
1486 1:
1487         stxa    %g0, [%o0 + %o2]ASI_MEM ! put zeros in the ecache line
1488         sub     %o2, 8, %o2
1489         brgez,a,pt %o2, 1b
1490         nop
1491 
1492         ! Displace cache line from each set of E$ starting at the
1493         ! aliased address. at set-size stride, wrapping at 2*ecache_size
1494         ! and skipping load from physaddr. We need 10 loads to flush the
1495         ! physaddr from E$.
1496         mov     HB_PHYS_FLUSH_CNT-1, %g4 ! #loads to flush phys addr
1497         sub     %o0, %o3, %o5           ! physaddr - ecache_flushaddr
1498         or      %o1, %g0, %g5           ! starting offset
1499 2:
1500         ldxa    [%g5 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1501 3:
1502         add     %g5, %g2, %g5           ! calculate offset in next set
1503         and     %g5, %g3, %g5           ! force offset within aliased range
1504         cmp     %g5, %o5                ! skip loads from physaddr
1505         be,pn %ncc, 3b
1506           nop
1507         brgz,pt %g4, 2b
1508           dec   %g4
1509 
1510         casxa   [%o0]ASI_MEM, %g0, %g0
1511 
1512         ! Flush %o0 from ecahe again.
1513         ! Need single displacement flush at offset %o1 this time as
1514         ! the E$ is already in direct map mode.
1515         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1516 
1517         membar  #Sync
1518         stxa    %g1, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1519         membar  #Sync
1520 #endif /* HUMMINGBIRD... */
1521 
1522         retl
1523         wrpr    %g0, %o4, %pstate       ! restore earlier pstate register value
1524         SET_SIZE(clearphys)
1525 
1526 /*
1527  * flushecacheline - This is a simpler version of scrubphys
1528  * which simply does a displacement flush of the line in
1529  * question. This routine is mainly used in handling async
1530  * errors where we want to get rid of a bad line in ecache.
1531  * Note that if the line is modified and it has suffered
1532  * data corruption - we are guarantee that the hw will write
1533  * a UE back to mark the page poisoned.
1534  */
1535         ENTRY(flushecacheline)
1536         or      %o1, %g0, %o2   ! put ecache size in %o2
1537 #ifndef HUMMINGBIRD
1538         xor     %o0, %o2, %o1   ! calculate alias address
1539         add     %o2, %o2, %o3   ! 2 * ecachesize in case
1540                                 ! addr == ecache_flushaddr
1541         sub     %o3, 1, %o3     ! -1 == mask
1542         and     %o1, %o3, %o1   ! and with xor'd address
1543         set     ecache_flushaddr, %o3
1544         ldx     [%o3], %o3
1545 
1546         rdpr    %pstate, %o4
1547         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1548         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1549 
1550         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1551         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
1552         membar  #Sync
1553 
1554         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1555         membar  #Sync
1556         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
1557         membar  #Sync                   
1558 #else /* HUMMINGBIRD */
1559         /*
1560          * UltraSPARC-IIe processor supports both 4-way set associative
1561          * and direct map E$. We need to reconfigure E$ to direct map
1562          * mode for data load/store before displacement flush. Also, we
1563          * need to flush all 4 sets of the E$ to ensure that the physaddr
1564          * has been flushed. Keep the interrupts disabled while flushing
1565          * E$ in this manner.
1566          *
1567          * For flushing a specific physical address, we start at the
1568          * aliased address and load at set-size stride, wrapping around
1569          * at 2*ecache-size boundary and skipping fault physical address.
1570          * It takes 10 loads to guarantee that the physical address has
1571          * been flushed.
1572          *
1573          * Usage:
1574          *      %o0     physaddr
1575          *      %o5     physaddr - ecache_flushaddr
1576          *      %g1     error enable register
1577          *      %g2     E$ set size
1578          *      %g3     E$ flush address range mask (i.e. 2 * E$ -1)
1579          *      %g4     UPA config (restored later)
1580          *      %g5     temp 
1581          */
1582 
1583         sethi   %hi(ecache_associativity), %g5
1584         ld      [%g5 + %lo(ecache_associativity)], %g5
1585         udivx   %o2, %g5, %g2   ! set size (i.e. ecache_size/#sets)
1586         xor     %o0, %o2, %o1   ! calculate alias address
1587         add     %o2, %o2, %g3   ! 2 * ecachesize in case
1588                                 ! addr == ecache_flushaddr
1589         sub     %g3, 1, %g3     ! 2 * ecachesize -1 == mask
1590         and     %o1, %g3, %o1   ! and with xor'd address
1591         sethi   %hi(ecache_flushaddr), %o3
1592         ldx     [%o3 + %lo(ecache_flushaddr)], %o3
1593 
1594         rdpr    %pstate, %o4
1595         andn    %o4, PSTATE_IE | PSTATE_AM, %o5
1596         wrpr    %o5, %g0, %pstate       ! clear IE, AM bits
1597 
1598         ! Place E$ in direct map mode for data access
1599         or      %g0, 1, %g5
1600         sllx    %g5, HB_UPA_DMAP_DATA_BIT, %g5
1601         ldxa    [%g0]ASI_UPA_CONFIG, %g4 ! current UPA config (restored later)
1602         or      %g4, %g5, %g5
1603         membar  #Sync
1604         stxa    %g5, [%g0]ASI_UPA_CONFIG ! enable direct map for data access
1605         membar  #Sync
1606 
1607         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1608         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! disable errors
1609         membar  #Sync
1610 
1611         ! Displace cache line from each set of E$ starting at the
1612         ! aliased address. at set-size stride, wrapping at 2*ecache_size
1613         ! and skipping load from physaddr. We need 10 loads to flush the
1614         ! physaddr from E$.
1615         mov     HB_PHYS_FLUSH_CNT-1, %g5 ! #loads to flush physaddr
1616         sub     %o0, %o3, %o5           ! physaddr - ecache_flushaddr
1617 2:
1618         ldxa    [%o1 + %o3]ASI_MEM, %g0 ! load ecache_flushaddr + alias
1619 3:
1620         add     %o1, %g2, %o1           ! calculate offset in next set
1621         and     %o1, %g3, %o1           ! force offset within aliased range
1622         cmp     %o1, %o5                ! skip loads from physaddr
1623         be,pn %ncc, 3b
1624           nop
1625         brgz,pt %g5, 2b
1626           dec   %g5
1627         
1628         membar  #Sync
1629         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! restore error enable
1630         membar  #Sync                   
1631 
1632         stxa    %g4, [%g0]ASI_UPA_CONFIG ! restore UPA config (DM bits)
1633         membar  #Sync
1634 #endif /* HUMMINGBIRD */
1635         retl
1636         wrpr    %g0, %o4, %pstate       
1637         SET_SIZE(flushecacheline)
1638 
1639 /*
1640  * ecache_scrubreq_tl1 is the crosstrap handler called at ecache_calls_a_sec Hz
1641  * from the clock CPU.  It atomically increments the outstanding request
1642  * counter and, if there was not already an outstanding request,
1643  * branches to setsoftint_tl1 to enqueue an intr_vec for the given inum.
1644  */
1645 
1646         ! Register usage:
1647         !
1648         ! Arguments:
1649         ! %g1 - inum
1650         !
1651         ! Internal:
1652         ! %g2, %g3, %g5 - scratch
1653         ! %g4 - ptr. to spitfire_scrub_misc ec_scrub_outstanding.
1654         ! %g6 - setsoftint_tl1 address
1655 
1656         ENTRY_NP(ecache_scrubreq_tl1)
1657         set     SFPR_SCRUB_MISC + EC_SCRUB_OUTSTANDING, %g2
1658         GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f);
1659         ld      [%g4], %g2              ! cpu's ec_scrub_outstanding.
1660         set     setsoftint_tl1, %g6
1661         !
1662         ! no need to use atomic instructions for the following
1663         ! increment - we're at tl1
1664         !
1665         add     %g2, 0x1, %g3
1666         brnz,pn %g2, 1f                 ! no need to enqueue more intr_vec
1667           st    %g3, [%g4]              ! delay - store incremented counter
1668         jmp     %g6                     ! setsoftint_tl1(%g1) - queue intr_vec
1669           nop
1670         ! not reached
1671 1:
1672         retry
1673         SET_SIZE(ecache_scrubreq_tl1)
1674 
1675         /*
1676          * write_ec_tag_parity(), which zero's the ecache tag,
1677          * marks the state as invalid and writes good parity to the tag.
1678          * Input %o1= 32 bit E$ index
1679          */
1680         ENTRY(write_ec_tag_parity)
1681         or      %g0, 1, %o4
1682         sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1683         or      %o0, %o4, %o4                 ! %o4 = ecache addr for tag write
1684 
1685         rdpr    %pstate, %o5
1686         andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1687         wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1688 
1689         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1690         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1691         membar  #Sync
1692 
1693         ba      1f
1694          nop
1695         /*
1696          * Align on the ecache boundary in order to force
1697          * ciritical code section onto the same ecache line.
1698          */
1699          .align 64
1700 
1701 1:
1702         set     S_EC_PARITY, %o3                ! clear tag, state invalid
1703         sllx    %o3, S_ECPAR_SHIFT, %o3         ! and with good tag parity
1704         stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1705         stxa    %g0, [%o4]ASI_EC_W
1706         membar  #Sync
1707 
1708         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1709         membar  #Sync
1710         retl
1711         wrpr    %g0, %o5, %pstate
1712         SET_SIZE(write_ec_tag_parity)
1713 
1714         /*
1715          * write_hb_ec_tag_parity(), which zero's the ecache tag,
1716          * marks the state as invalid and writes good parity to the tag.
1717          * Input %o1= 32 bit E$ index
1718          */
1719         ENTRY(write_hb_ec_tag_parity)
1720         or      %g0, 1, %o4
1721         sllx    %o4, 39, %o4                    ! set bit 40 for e$ tag access
1722         or      %o0, %o4, %o4               ! %o4 = ecache addr for tag write
1723 
1724         rdpr    %pstate, %o5
1725         andn    %o5, PSTATE_IE | PSTATE_AM, %o1
1726         wrpr    %o1, %g0, %pstate               ! clear IE, AM bits
1727 
1728         ldxa    [%g0]ASI_ESTATE_ERR, %g1
1729         stxa    %g0, [%g0]ASI_ESTATE_ERR        ! Turn off Error enable
1730         membar  #Sync
1731 
1732         ba      1f
1733          nop
1734         /*
1735          * Align on the ecache boundary in order to force
1736          * ciritical code section onto the same ecache line.
1737          */
1738          .align 64
1739 1: 
1740 #ifdef HUMMINGBIRD
1741         set     HB_EC_PARITY, %o3               ! clear tag, state invalid
1742         sllx    %o3, HB_ECPAR_SHIFT, %o3        ! and with good tag parity
1743 #else /* !HUMMINGBIRD */
1744         set     SB_EC_PARITY, %o3               ! clear tag, state invalid
1745         sllx    %o3, SB_ECPAR_SHIFT, %o3        ! and with good tag parity
1746 #endif /* !HUMMINGBIRD */
1747 
1748         stxa    %o3, [%g0]ASI_EC_DIAG           ! update with the above info
1749         stxa    %g0, [%o4]ASI_EC_W
1750         membar  #Sync
1751 
1752         stxa    %g1, [%g0]ASI_ESTATE_ERR        ! Turn error enable back on
1753         membar  #Sync
1754         retl
1755         wrpr    %g0, %o5, %pstate
1756         SET_SIZE(write_hb_ec_tag_parity)
1757 
1758 #define VIS_BLOCKSIZE           64
1759 
1760         ENTRY(dtrace_blksuword32)
1761         save    %sp, -SA(MINFRAME + 4), %sp
1762 
1763         rdpr    %pstate, %l1
1764         andn    %l1, PSTATE_IE, %l2             ! disable interrupts to
1765         wrpr    %g0, %l2, %pstate               ! protect our FPU diddling
1766 
1767         rd      %fprs, %l0
1768         andcc   %l0, FPRS_FEF, %g0
1769         bz,a,pt %xcc, 1f                        ! if the fpu is disabled
1770         wr      %g0, FPRS_FEF, %fprs            ! ... enable the fpu
1771 
1772         st      %f0, [%fp + STACK_BIAS - 4]     ! save %f0 to the stack
1773 1:
1774         set     0f, %l5
1775         /*
1776          * We're about to write a block full or either total garbage
1777          * (not kernel data, don't worry) or user floating-point data
1778          * (so it only _looks_ like garbage).
1779          */
1780         ld      [%i1], %f0                      ! modify the block
1781         membar  #Sync
1782         stn     %l5, [THREAD_REG + T_LOFAULT]   ! set up the lofault handler
1783         stda    %d0, [%i0]ASI_BLK_COMMIT_S      ! store the modified block
1784         membar  #Sync
1785         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
1786 
1787         bz,a,pt %xcc, 1f
1788         wr      %g0, %l0, %fprs                 ! restore %fprs
1789 
1790         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
1791 1:
1792 
1793         wrpr    %g0, %l1, %pstate               ! restore interrupts
1794 
1795         ret
1796         restore %g0, %g0, %o0
1797 
1798 0:
1799         membar  #Sync
1800         stn     %g0, [THREAD_REG + T_LOFAULT]   ! remove the lofault handler
1801 
1802         bz,a,pt %xcc, 1f
1803         wr      %g0, %l0, %fprs                 ! restore %fprs
1804 
1805         ld      [%fp + STACK_BIAS - 4], %f0     ! restore %f0
1806 1:
1807 
1808         wrpr    %g0, %l1, %pstate               ! restore interrupts
1809 
1810         /*
1811          * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err()
1812          * which deals with watchpoints. Otherwise, just return -1.
1813          */
1814         brnz,pt %i2, 1f
1815         nop
1816         ret
1817         restore %g0, -1, %o0
1818 1:
1819         call    dtrace_blksuword32_err
1820         restore
1821 
1822         SET_SIZE(dtrace_blksuword32)
1823