1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Assembly code support for Cheetah/Cheetah+ modules 26 */ 27 28 #include "assym.h" 29 30 #include <sys/asm_linkage.h> 31 #include <sys/mmu.h> 32 #include <vm/hat_sfmmu.h> 33 #include <sys/machparam.h> 34 #include <sys/machcpuvar.h> 35 #include <sys/machthread.h> 36 #include <sys/machtrap.h> 37 #include <sys/privregs.h> 38 #include <sys/trap.h> 39 #include <sys/cheetahregs.h> 40 #include <sys/us3_module.h> 41 #include <sys/xc_impl.h> 42 #include <sys/intreg.h> 43 #include <sys/async.h> 44 #include <sys/clock.h> 45 #include <sys/cheetahasm.h> 46 #include <sys/cmpregs.h> 47 48 #ifdef TRAPTRACE 49 #include <sys/traptrace.h> 50 #endif /* TRAPTRACE */ 51 52 /* BEGIN CSTYLED */ 53 54 #define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \ 55 ldxa [%g0]ASI_DCU, tmp1 ;\ 56 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\ 57 bz,pn %icc, 1f ;\ 58 ASM_LD(tmp1, dcache_linesize) ;\ 59 ASM_LD(tmp2, dflush_type) ;\ 60 cmp tmp2, FLUSHPAGE_TYPE ;\ 61 be,pt %icc, 2f ;\ 62 nop ;\ 63 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\ 64 ASM_LD(tmp3, dcache_size) ;\ 65 cmp tmp2, FLUSHMATCH_TYPE ;\ 66 be,pt %icc, 3f ;\ 67 nop ;\ 68 /* \ 69 * flushtype = FLUSHALL_TYPE, flush the whole thing \ 70 * tmp3 = cache size \ 71 * tmp1 = cache line size \ 72 */ \ 73 sub tmp3, tmp1, tmp2 ;\ 74 4: \ 75 stxa %g0, [tmp2]ASI_DC_TAG ;\ 76 membar #Sync ;\ 77 cmp %g0, tmp2 ;\ 78 bne,pt %icc, 4b ;\ 79 sub tmp2, tmp1, tmp2 ;\ 80 ba,pt %icc, 1f ;\ 81 nop ;\ 82 /* \ 83 * flushtype = FLUSHPAGE_TYPE \ 84 * arg1 = pfn \ 85 * arg2 = virtual color \ 86 * tmp1 = cache line size \ 87 * tmp2 = tag from cache \ 88 * tmp3 = counter \ 89 */ \ 90 2: \ 91 set MMU_PAGESIZE, tmp3 ;\ 92 sllx arg1, MMU_PAGESHIFT, arg1 /* pfn to 43 bit PA */ ;\ 93 sub tmp3, tmp1, tmp3 ;\ 94 4: \ 95 stxa %g0, [arg1 + tmp3]ASI_DC_INVAL ;\ 96 membar #Sync ;\ 97 5: \ 98 cmp %g0, tmp3 ;\ 99 bnz,pt %icc, 4b /* branch if not done */ ;\ 100 sub tmp3, tmp1, tmp3 ;\ 101 ba,pt %icc, 1f ;\ 102 nop ;\ 103 /* \ 104 * flushtype = FLUSHMATCH_TYPE \ 105 * arg1 = tag to compare against \ 106 * tmp1 = cache line size \ 107 * tmp3 = cache size \ 108 * arg2 = counter \ 109 * tmp2 = cache tag \ 110 */ \ 111 3: \ 112 sub tmp3, tmp1, arg2 ;\ 113 4: \ 114 ldxa [arg2]ASI_DC_TAG, tmp2 /* read tag */ ;\ 115 btst CHEETAH_DC_VBIT_MASK, tmp2 ;\ 116 bz,pn %icc, 5f /* br if no valid sub-blocks */ ;\ 117 andn tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\ 118 cmp tmp2, arg1 ;\ 119 bne,pn %icc, 5f /* branch if tag miss */ ;\ 120 nop ;\ 121 stxa %g0, [arg2]ASI_DC_TAG ;\ 122 membar #Sync ;\ 123 5: \ 124 cmp %g0, arg2 ;\ 125 bne,pt %icc, 4b /* branch if not done */ ;\ 126 sub arg2, tmp1, arg2 ;\ 127 1: 128 129 /* 130 * macro that flushes the entire dcache color 131 * dcache size = 64K, one way 16K 132 * 133 * In: 134 * arg = virtual color register (not clobbered) 135 * way = way#, can either be a constant or a register (not clobbered) 136 * tmp1, tmp2, tmp3 = scratch registers 137 * 138 */ 139 #define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3) \ 140 ldxa [%g0]ASI_DCU, tmp1; \ 141 btst DCU_DC, tmp1; /* is dcache enabled? */ \ 142 bz,pn %icc, 1f; \ 143 ASM_LD(tmp1, dcache_linesize) \ 144 /* \ 145 * arg = virtual color \ 146 * tmp1 = cache line size \ 147 */ \ 148 sllx arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */ \ 149 mov way, tmp3; \ 150 sllx tmp3, 14, tmp3; /* One way 16K */ \ 151 or tmp2, tmp3, tmp3; \ 152 set MMU_PAGESIZE, tmp2; \ 153 /* \ 154 * tmp2 = page size \ 155 * tmp3 = cached page in dcache \ 156 */ \ 157 sub tmp2, tmp1, tmp2; \ 158 2: \ 159 stxa %g0, [tmp3 + tmp2]ASI_DC_TAG; \ 160 membar #Sync; \ 161 cmp %g0, tmp2; \ 162 bne,pt %icc, 2b; \ 163 sub tmp2, tmp1, tmp2; \ 164 1: 165 166 /* END CSTYLED */ 167 168 /* 169 * Cheetah MMU and Cache operations. 170 */ 171 172 ENTRY_NP(vtag_flushpage) 173 /* 174 * flush page from the tlb 175 * 176 * %o0 = vaddr 177 * %o1 = sfmmup 178 */ 179 rdpr %pstate, %o5 180 #ifdef DEBUG 181 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1) 182 #endif /* DEBUG */ 183 /* 184 * disable ints 185 */ 186 andn %o5, PSTATE_IE, %o4 187 wrpr %o4, 0, %pstate 188 189 /* 190 * Then, blow out the tlb 191 * Interrupts are disabled to prevent the primary ctx register 192 * from changing underneath us. 193 */ 194 sethi %hi(ksfmmup), %o3 195 ldx [%o3 + %lo(ksfmmup)], %o3 196 cmp %o3, %o1 197 bne,pt %xcc, 1f ! if not kernel as, go to 1 198 sethi %hi(FLUSH_ADDR), %o3 199 /* 200 * For Kernel demaps use primary. type = page implicitly 201 */ 202 stxa %g0, [%o0]ASI_DTLB_DEMAP /* dmmu flush for KCONTEXT */ 203 stxa %g0, [%o0]ASI_ITLB_DEMAP /* immu flush for KCONTEXT */ 204 flush %o3 205 retl 206 wrpr %g0, %o5, %pstate /* enable interrupts */ 207 1: 208 /* 209 * User demap. We need to set the primary context properly. 210 * Secondary context cannot be used for Cheetah IMMU. 211 * %o0 = vaddr 212 * %o1 = sfmmup 213 * %o3 = FLUSH_ADDR 214 */ 215 SFMMU_CPU_CNUM(%o1, %g1, %g2) ! %g1 = sfmmu cnum on this CPU 216 217 ldub [%o1 + SFMMU_CEXT], %o4 ! %o4 = sfmmup->sfmmu_cext 218 sll %o4, CTXREG_EXT_SHIFT, %o4 219 or %g1, %o4, %g1 ! %g1 = primary pgsz | cnum 220 221 wrpr %g0, 1, %tl 222 set MMU_PCONTEXT, %o4 223 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0 224 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum 225 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz 226 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz 227 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum 228 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum 229 230 stxa %g0, [%o0]ASI_DTLB_DEMAP 231 stxa %g0, [%o0]ASI_ITLB_DEMAP 232 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */ 233 flush %o3 234 wrpr %g0, 0, %tl 235 236 retl 237 wrpr %g0, %o5, %pstate /* enable interrupts */ 238 SET_SIZE(vtag_flushpage) 239 240 ENTRY_NP2(vtag_flushall, demap_all) 241 /* 242 * flush the tlb 243 */ 244 sethi %hi(FLUSH_ADDR), %o3 245 set DEMAP_ALL_TYPE, %g1 246 stxa %g0, [%g1]ASI_DTLB_DEMAP 247 stxa %g0, [%g1]ASI_ITLB_DEMAP 248 flush %o3 249 retl 250 nop 251 SET_SIZE(demap_all) 252 SET_SIZE(vtag_flushall) 253 254 255 ENTRY_NP(vtag_flushpage_tl1) 256 /* 257 * x-trap to flush page from tlb and tsb 258 * 259 * %g1 = vaddr, zero-extended on 32-bit kernel 260 * %g2 = sfmmup 261 * 262 * assumes TSBE_TAG = 0 263 */ 264 srln %g1, MMU_PAGESHIFT, %g1 265 266 sethi %hi(ksfmmup), %g3 267 ldx [%g3 + %lo(ksfmmup)], %g3 268 cmp %g3, %g2 269 bne,pt %xcc, 1f ! if not kernel as, go to 1 270 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */ 271 272 /* We need to demap in the kernel context */ 273 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1 274 stxa %g0, [%g1]ASI_DTLB_DEMAP 275 stxa %g0, [%g1]ASI_ITLB_DEMAP 276 retry 277 1: 278 /* We need to demap in a user context */ 279 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1 280 281 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU 282 283 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext 284 sll %g4, CTXREG_EXT_SHIFT, %g4 285 or %g6, %g4, %g6 ! %g6 = pgsz | cnum 286 287 set MMU_PCONTEXT, %g4 288 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */ 289 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */ 290 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */ 291 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */ 292 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */ 293 stxa %g0, [%g1]ASI_DTLB_DEMAP 294 stxa %g0, [%g1]ASI_ITLB_DEMAP 295 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */ 296 retry 297 SET_SIZE(vtag_flushpage_tl1) 298 299 300 ENTRY_NP(vtag_flush_pgcnt_tl1) 301 /* 302 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb 303 * 304 * %g1 = vaddr, zero-extended on 32-bit kernel 305 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits. 306 * 307 * NOTE: this handler relies on the fact that no 308 * interrupts or traps can occur during the loop 309 * issuing the TLB_DEMAP operations. It is assumed 310 * that interrupts are disabled and this code is 311 * fetching from the kernel locked text address. 312 * 313 * assumes TSBE_TAG = 0 314 */ 315 set SFMMU_PGCNT_MASK, %g4 316 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */ 317 add %g3, 1, %g3 /* g3 = pgcnt */ 318 319 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */ 320 srln %g1, MMU_PAGESHIFT, %g1 321 322 sethi %hi(ksfmmup), %g4 323 ldx [%g4 + %lo(ksfmmup)], %g4 324 cmp %g4, %g2 325 bne,pn %xcc, 1f /* if not kernel as, go to 1 */ 326 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */ 327 328 /* We need to demap in the kernel context */ 329 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1 330 set MMU_PAGESIZE, %g2 /* g2 = pgsize */ 331 sethi %hi(FLUSH_ADDR), %g5 332 4: 333 stxa %g0, [%g1]ASI_DTLB_DEMAP 334 stxa %g0, [%g1]ASI_ITLB_DEMAP 335 flush %g5 ! flush required by immu 336 337 deccc %g3 /* decr pgcnt */ 338 bnz,pt %icc,4b 339 add %g1, %g2, %g1 /* next page */ 340 retry 341 1: 342 /* 343 * We need to demap in a user context 344 * 345 * g2 = sfmmup 346 * g3 = pgcnt 347 */ 348 SFMMU_CPU_CNUM(%g2, %g5, %g6) ! %g5 = sfmmu cnum on this CPU 349 350 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1 351 352 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext 353 sll %g4, CTXREG_EXT_SHIFT, %g4 354 or %g5, %g4, %g5 355 356 set MMU_PCONTEXT, %g4 357 ldxa [%g4]ASI_DMMU, %g6 /* rd old ctxnum */ 358 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */ 359 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */ 360 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */ 361 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */ 362 363 set MMU_PAGESIZE, %g2 /* g2 = pgsize */ 364 sethi %hi(FLUSH_ADDR), %g5 365 3: 366 stxa %g0, [%g1]ASI_DTLB_DEMAP 367 stxa %g0, [%g1]ASI_ITLB_DEMAP 368 flush %g5 ! flush required by immu 369 370 deccc %g3 /* decr pgcnt */ 371 bnz,pt %icc,3b 372 add %g1, %g2, %g1 /* next page */ 373 374 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */ 375 retry 376 SET_SIZE(vtag_flush_pgcnt_tl1) 377 378 ENTRY_NP(vtag_flushall_tl1) 379 /* 380 * x-trap to flush tlb 381 */ 382 set DEMAP_ALL_TYPE, %g4 383 stxa %g0, [%g4]ASI_DTLB_DEMAP 384 stxa %g0, [%g4]ASI_ITLB_DEMAP 385 retry 386 SET_SIZE(vtag_flushall_tl1) 387 388 389 /* 390 * vac_flushpage(pfnum, color) 391 * Flush 1 8k page of the D-$ with physical page = pfnum 392 * Algorithm: 393 * The cheetah dcache is a 64k psuedo 4 way accaociative cache. 394 * It is virtual indexed, physically tagged cache. 395 */ 396 .seg ".data" 397 .align 8 398 .global dflush_type 399 dflush_type: 400 .word FLUSHPAGE_TYPE 401 402 ENTRY(vac_flushpage) 403 /* 404 * flush page from the d$ 405 * 406 * %o0 = pfnum, %o1 = color 407 */ 408 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4) 409 retl 410 nop 411 SET_SIZE(vac_flushpage) 412 413 414 ENTRY_NP(vac_flushpage_tl1) 415 /* 416 * x-trap to flush page from the d$ 417 * 418 * %g1 = pfnum, %g2 = color 419 */ 420 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5) 421 retry 422 SET_SIZE(vac_flushpage_tl1) 423 424 425 ENTRY(vac_flushcolor) 426 /* 427 * %o0 = vcolor 428 */ 429 DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3) 430 DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3) 431 DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3) 432 DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3) 433 retl 434 nop 435 SET_SIZE(vac_flushcolor) 436 437 438 ENTRY(vac_flushcolor_tl1) 439 /* 440 * %g1 = vcolor 441 */ 442 DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4) 443 DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4) 444 DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4) 445 DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4) 446 retry 447 SET_SIZE(vac_flushcolor_tl1) 448 449 /* 450 * Determine whether or not the IDSR is busy. 451 * Entry: no arguments 452 * Returns: 1 if busy, 0 otherwise 453 */ 454 ENTRY(idsr_busy) 455 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1 456 clr %o0 457 btst IDSR_BUSY, %g1 458 bz,a,pt %xcc, 1f 459 mov 1, %o0 460 1: 461 retl 462 nop 463 SET_SIZE(idsr_busy) 464 465 .global _dispatch_status_busy 466 _dispatch_status_busy: 467 .asciz "ASI_INTR_DISPATCH_STATUS error: busy" 468 .align 4 469 470 /* 471 * Setup interrupt dispatch data registers 472 * Entry: 473 * %o0 - function or inumber to call 474 * %o1, %o2 - arguments (2 uint64_t's) 475 */ 476 .seg "text" 477 478 ENTRY(init_mondo) 479 #ifdef DEBUG 480 ! 481 ! IDSR should not be busy at the moment 482 ! 483 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1 484 btst IDSR_BUSY, %g1 485 bz,pt %xcc, 1f 486 nop 487 sethi %hi(_dispatch_status_busy), %o0 488 call panic 489 or %o0, %lo(_dispatch_status_busy), %o0 490 #endif /* DEBUG */ 491 492 ALTENTRY(init_mondo_nocheck) 493 ! 494 ! interrupt vector dispatch data reg 0 495 ! 496 1: 497 mov IDDR_0, %g1 498 mov IDDR_1, %g2 499 mov IDDR_2, %g3 500 stxa %o0, [%g1]ASI_INTR_DISPATCH 501 502 ! 503 ! interrupt vector dispatch data reg 1 504 ! 505 stxa %o1, [%g2]ASI_INTR_DISPATCH 506 507 ! 508 ! interrupt vector dispatch data reg 2 509 ! 510 stxa %o2, [%g3]ASI_INTR_DISPATCH 511 512 membar #Sync 513 retl 514 nop 515 SET_SIZE(init_mondo_nocheck) 516 SET_SIZE(init_mondo) 517 518 519 #if !(defined(JALAPENO) || defined(SERRANO)) 520 521 /* 522 * Ship mondo to aid using busy/nack pair bn 523 */ 524 ENTRY_NP(shipit) 525 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id 526 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair 527 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70 528 or %g1, %g2, %g1 529 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch 530 membar #Sync 531 retl 532 nop 533 SET_SIZE(shipit) 534 535 #endif /* !(JALAPENO || SERRANO) */ 536 537 538 /* 539 * flush_instr_mem: 540 * Flush 1 page of the I-$ starting at vaddr 541 * %o0 vaddr 542 * %o1 bytes to be flushed 543 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with 544 * the stores from all processors so that a FLUSH instruction is only needed 545 * to ensure pipeline is consistent. This means a single flush is sufficient at 546 * the end of a sequence of stores that updates the instruction stream to 547 * ensure correct operation. 548 */ 549 550 ENTRY(flush_instr_mem) 551 flush %o0 ! address irrelevant 552 retl 553 nop 554 SET_SIZE(flush_instr_mem) 555 556 557 #if defined(CPU_IMP_ECACHE_ASSOC) 558 559 ENTRY(get_ecache_ctrl) 560 GET_CPU_IMPL(%o0) 561 cmp %o0, JAGUAR_IMPL 562 ! 563 ! Putting an ASI access in the delay slot may 564 ! cause it to be accessed, even when annulled. 565 ! 566 bne 1f 567 nop 568 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg 569 b 2f 570 nop 571 1: 572 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg 573 2: 574 retl 575 nop 576 SET_SIZE(get_ecache_ctrl) 577 578 #endif /* CPU_IMP_ECACHE_ASSOC */ 579 580 581 #if !(defined(JALAPENO) || defined(SERRANO)) 582 583 /* 584 * flush_ecache: 585 * %o0 - 64 bit physical address 586 * %o1 - ecache size 587 * %o2 - ecache linesize 588 */ 589 590 ENTRY(flush_ecache) 591 592 /* 593 * For certain CPU implementations, we have to flush the L2 cache 594 * before flushing the ecache. 595 */ 596 PN_L2_FLUSHALL(%g3, %g4, %g5) 597 598 /* 599 * Flush the entire Ecache using displacement flush. 600 */ 601 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4) 602 603 retl 604 nop 605 SET_SIZE(flush_ecache) 606 607 #endif /* !(JALAPENO || SERRANO) */ 608 609 610 ENTRY(flush_dcache) 611 ASM_LD(%o0, dcache_size) 612 ASM_LD(%o1, dcache_linesize) 613 CH_DCACHE_FLUSHALL(%o0, %o1, %o2) 614 retl 615 nop 616 SET_SIZE(flush_dcache) 617 618 619 ENTRY(flush_icache) 620 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1); 621 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1 622 ba,pt %icc, 2f 623 ld [%o0 + CHPR_ICACHE_SIZE], %o0 624 flush_icache_1: 625 ASM_LD(%o0, icache_size) 626 ASM_LD(%o1, icache_linesize) 627 2: 628 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4) 629 retl 630 nop 631 SET_SIZE(flush_icache) 632 633 ENTRY(kdi_flush_idcache) 634 CH_DCACHE_FLUSHALL(%o0, %o1, %g1) 635 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2) 636 membar #Sync 637 retl 638 nop 639 SET_SIZE(kdi_flush_idcache) 640 641 ENTRY(flush_pcache) 642 PCACHE_FLUSHALL(%o0, %o1, %o2) 643 retl 644 nop 645 SET_SIZE(flush_pcache) 646 647 648 #if defined(CPU_IMP_L1_CACHE_PARITY) 649 650 /* 651 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t 652 * structure (see cheetahregs.h): 653 * The Dcache *should* be turned off when this code is executed. 654 */ 655 .align 128 656 ENTRY(get_dcache_dtag) 657 rdpr %pstate, %o5 658 andn %o5, PSTATE_IE | PSTATE_AM, %o3 659 wrpr %g0, %o3, %pstate 660 b 1f 661 stx %o0, [%o1 + CH_DC_IDX] 662 663 .align 128 664 1: 665 ldxa [%o0]ASI_DC_TAG, %o2 666 stx %o2, [%o1 + CH_DC_TAG] 667 membar #Sync 668 ldxa [%o0]ASI_DC_UTAG, %o2 669 membar #Sync 670 stx %o2, [%o1 + CH_DC_UTAG] 671 ldxa [%o0]ASI_DC_SNP_TAG, %o2 672 stx %o2, [%o1 + CH_DC_SNTAG] 673 add %o1, CH_DC_DATA, %o1 674 clr %o3 675 2: 676 membar #Sync ! required before ASI_DC_DATA 677 ldxa [%o0 + %o3]ASI_DC_DATA, %o2 678 membar #Sync ! required after ASI_DC_DATA 679 stx %o2, [%o1 + %o3] 680 cmp %o3, CH_DC_DATA_REG_SIZE - 8 681 blt 2b 682 add %o3, 8, %o3 683 684 /* 685 * Unlike other CPUs in the family, D$ data parity bits for Panther 686 * do not reside in the microtag. Instead, we have to read them 687 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead 688 * of just having 8 parity bits to protect all 32 bytes of data 689 * per line, we now have 32 bits of parity. 690 */ 691 GET_CPU_IMPL(%o3) 692 cmp %o3, PANTHER_IMPL 693 bne 4f 694 clr %o3 695 696 /* 697 * move our pointer to the next field where we store parity bits 698 * and add the offset of the last parity byte since we will be 699 * storing all 4 parity bytes within one 64 bit field like this: 700 * 701 * +------+------------+------------+------------+------------+ 702 * | - | DC_parity | DC_parity | DC_parity | DC_parity | 703 * | - | for word 3 | for word 2 | for word 1 | for word 0 | 704 * +------+------------+------------+------------+------------+ 705 * 63:32 31:24 23:16 15:8 7:0 706 */ 707 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1 708 709 /* add the DC_data_parity bit into our working index */ 710 mov 1, %o2 711 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2 712 or %o0, %o2, %o0 713 3: 714 membar #Sync ! required before ASI_DC_DATA 715 ldxa [%o0 + %o3]ASI_DC_DATA, %o2 716 membar #Sync ! required after ASI_DC_DATA 717 stb %o2, [%o1] 718 dec %o1 719 cmp %o3, CH_DC_DATA_REG_SIZE - 8 720 blt 3b 721 add %o3, 8, %o3 722 4: 723 retl 724 wrpr %g0, %o5, %pstate 725 SET_SIZE(get_dcache_dtag) 726 727 728 /* 729 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t 730 * structure (see cheetahregs.h): 731 * The Icache *Must* be turned off when this function is called. 732 * This is because diagnostic accesses to the Icache interfere with cache 733 * consistency. 734 */ 735 .align 128 736 ENTRY(get_icache_dtag) 737 rdpr %pstate, %o5 738 andn %o5, PSTATE_IE | PSTATE_AM, %o3 739 wrpr %g0, %o3, %pstate 740 741 stx %o0, [%o1 + CH_IC_IDX] 742 ldxa [%o0]ASI_IC_TAG, %o2 743 stx %o2, [%o1 + CH_IC_PATAG] 744 add %o0, CH_ICTAG_UTAG, %o0 745 ldxa [%o0]ASI_IC_TAG, %o2 746 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0 747 stx %o2, [%o1 + CH_IC_UTAG] 748 ldxa [%o0]ASI_IC_TAG, %o2 749 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0 750 stx %o2, [%o1 + CH_IC_UPPER] 751 ldxa [%o0]ASI_IC_TAG, %o2 752 andn %o0, CH_ICTAG_TMASK, %o0 753 stx %o2, [%o1 + CH_IC_LOWER] 754 ldxa [%o0]ASI_IC_SNP_TAG, %o2 755 stx %o2, [%o1 + CH_IC_SNTAG] 756 add %o1, CH_IC_DATA, %o1 757 clr %o3 758 2: 759 ldxa [%o0 + %o3]ASI_IC_DATA, %o2 760 stx %o2, [%o1 + %o3] 761 cmp %o3, PN_IC_DATA_REG_SIZE - 8 762 blt 2b 763 add %o3, 8, %o3 764 765 retl 766 wrpr %g0, %o5, %pstate 767 SET_SIZE(get_icache_dtag) 768 769 /* 770 * Get pcache data and tags. 771 * inputs: 772 * pcache_idx - fully constructed VA for for accessing P$ diagnostic 773 * registers. Contains PC_way and PC_addr shifted into 774 * the correct bit positions. See the PRM for more details. 775 * data - pointer to a ch_pc_data_t 776 * structure (see cheetahregs.h): 777 */ 778 .align 128 779 ENTRY(get_pcache_dtag) 780 rdpr %pstate, %o5 781 andn %o5, PSTATE_IE | PSTATE_AM, %o3 782 wrpr %g0, %o3, %pstate 783 784 stx %o0, [%o1 + CH_PC_IDX] 785 ldxa [%o0]ASI_PC_STATUS_DATA, %o2 786 stx %o2, [%o1 + CH_PC_STATUS] 787 ldxa [%o0]ASI_PC_TAG, %o2 788 stx %o2, [%o1 + CH_PC_TAG] 789 ldxa [%o0]ASI_PC_SNP_TAG, %o2 790 stx %o2, [%o1 + CH_PC_SNTAG] 791 add %o1, CH_PC_DATA, %o1 792 clr %o3 793 2: 794 ldxa [%o0 + %o3]ASI_PC_DATA, %o2 795 stx %o2, [%o1 + %o3] 796 cmp %o3, CH_PC_DATA_REG_SIZE - 8 797 blt 2b 798 add %o3, 8, %o3 799 800 retl 801 wrpr %g0, %o5, %pstate 802 SET_SIZE(get_pcache_dtag) 803 804 #endif /* CPU_IMP_L1_CACHE_PARITY */ 805 806 /* 807 * re-enable the i$, d$, w$, and p$ according to bootup cache state. 808 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE. 809 * %o0 - 64 bit constant 810 */ 811 ENTRY(set_dcu) 812 stxa %o0, [%g0]ASI_DCU ! Store to DCU 813 flush %g0 /* flush required after changing the IC bit */ 814 retl 815 nop 816 SET_SIZE(set_dcu) 817 818 819 /* 820 * Return DCU register. 821 */ 822 ENTRY(get_dcu) 823 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */ 824 retl 825 nop 826 SET_SIZE(get_dcu) 827 828 /* 829 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry. 830 * 831 * This handler is used to check for softints generated by error trap 832 * handlers to report errors. On Cheetah, this mechanism is used by the 833 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast 834 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers. 835 * NB: Must be 8 instructions or less to fit in trap table and code must 836 * be relocatable. 837 */ 838 839 ENTRY_NP(ch_pil15_interrupt_instr) 840 ASM_JMP(%g1, ch_pil15_interrupt) 841 SET_SIZE(ch_pil15_interrupt_instr) 842 843 844 ENTRY_NP(ch_pil15_interrupt) 845 846 /* 847 * Since pil_interrupt is hacked to assume that every level 15 848 * interrupt is generated by the CPU to indicate a performance 849 * counter overflow this gets ugly. Before calling pil_interrupt 850 * the Error at TL>0 pending status is inspected. If it is 851 * non-zero, then an error has occurred and it is handled. 852 * Otherwise control is transfered to pil_interrupt. Note that if 853 * an error is detected pil_interrupt will not be called and 854 * overflow interrupts may be lost causing erroneous performance 855 * measurements. However, error-recovery will have a detrimental 856 * effect on performance anyway. 857 */ 858 CPU_INDEX(%g1, %g4) 859 set ch_err_tl1_pending, %g4 860 ldub [%g1 + %g4], %g2 861 brz %g2, 1f 862 nop 863 864 /* 865 * We have a pending TL>0 error, clear the TL>0 pending status. 866 */ 867 stb %g0, [%g1 + %g4] 868 869 /* 870 * Clear the softint. 871 */ 872 mov 1, %g5 873 sll %g5, PIL_15, %g5 874 wr %g5, CLEAR_SOFTINT 875 876 /* 877 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15 878 * to process the Fast ECC/Cache Parity at TL>0 error. Clear 879 * panic flag (%g2). 880 */ 881 set cpu_tl1_error, %g1 882 clr %g2 883 ba sys_trap 884 mov PIL_15, %g4 885 886 1: 887 /* 888 * The logout is invalid. 889 * 890 * Call the default interrupt handler. 891 */ 892 sethi %hi(pil_interrupt), %g1 893 jmp %g1 + %lo(pil_interrupt) 894 mov PIL_15, %g4 895 896 SET_SIZE(ch_pil15_interrupt) 897 898 899 /* 900 * Error Handling 901 * 902 * Cheetah provides error checking for all memory access paths between 903 * the CPU, External Cache, Cheetah Data Switch and system bus. Error 904 * information is logged in the AFSR, (also AFSR_EXT for Panther) and 905 * AFAR and one of the following traps is generated (provided that it 906 * is enabled in External Cache Error Enable Register) to handle that 907 * error: 908 * 1. trap 0x70: Precise trap 909 * tt0_fecc for errors at trap level(TL)>=0 910 * 2. trap 0x0A and 0x32: Deferred trap 911 * async_err for errors at TL>=0 912 * 3. trap 0x63: Disrupting trap 913 * ce_err for errors at TL=0 914 * (Note that trap 0x63 cannot happen at trap level > 0) 915 * 916 * Trap level one handlers panic the system except for the fast ecc 917 * error handler which tries to recover from certain errors. 918 */ 919 920 /* 921 * FAST ECC TRAP STRATEGY: 922 * 923 * Software must handle single and multi bit errors which occur due to data 924 * or instruction cache reads from the external cache. A single or multi bit 925 * error occuring in one of these situations results in a precise trap. 926 * 927 * The basic flow of this trap handler is as follows: 928 * 929 * 1) Record the state and then turn off the Dcache and Icache. The Dcache 930 * is disabled because bad data could have been installed. The Icache is 931 * turned off because we want to capture the Icache line related to the 932 * AFAR. 933 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing. 934 * 3) Park sibling core if caches are shared (to avoid race condition while 935 * accessing shared resources such as L3 data staging register during 936 * CPU logout. 937 * 4) Read the AFAR and AFSR. 938 * 5) If CPU logout structure is not being used, then: 939 * 6) Clear all errors from the AFSR. 940 * 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure. 941 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous 942 * state. 943 * 9) Unpark sibling core if we parked it earlier. 944 * 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already 945 * running at PIL 15. 946 * 6) Otherwise, if CPU logout structure is being used: 947 * 7) Incriment the "logout busy count". 948 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous 949 * state. 950 * 9) Unpark sibling core if we parked it earlier. 951 * 10) Issue a retry since the other CPU error logging code will end up 952 * finding this error bit and logging information about it later. 953 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not 954 * yet initialized such that we can't even check the logout struct, then 955 * we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and 956 * call cpu_fast_ecc_error via systrap. The clo_flags parameter is used 957 * to determine information such as TL, TT, CEEN and NCEEN settings, etc 958 * in the high level trap handler since we don't have access to detailed 959 * logout information in cases where the cpu_private struct is not yet 960 * initialized. 961 * 962 * We flush the E$ and D$ here on TL=1 code to prevent getting nested 963 * Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in 964 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which, 965 * since it is uses different code/data from this handler, has a better 966 * chance of fixing things up than simply recursing through this code 967 * again (this would probably cause an eventual kernel stack overflow). 968 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it 969 * can flush the E$ (or the error is a stuck-at bit), we will recurse in 970 * the Fast ECC at TL>0 handler and eventually Red Mode. 971 * 972 * Note that for Cheetah (and only Cheetah), we use alias addresses for 973 * flushing rather than ASI accesses (which don't exist on Cheetah). 974 * Should we encounter a Fast ECC error within this handler on Cheetah, 975 * there's a good chance it's within the ecache_flushaddr buffer (since 976 * it's the largest piece of memory we touch in the handler and it is 977 * usually kernel text/data). For that reason the Fast ECC at TL>0 978 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr. 979 */ 980 981 /* 982 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0 983 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various 984 * architecture-specific files. 985 * NB: Must be 8 instructions or less to fit in trap table and code must 986 * be relocatable. 987 */ 988 989 ENTRY_NP(fecc_err_instr) 990 membar #Sync ! Cheetah requires membar #Sync 991 992 /* 993 * Save current DCU state. Turn off the Dcache and Icache. 994 */ 995 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 996 andn %g1, DCU_DC + DCU_IC, %g4 997 stxa %g4, [%g0]ASI_DCU 998 flush %g0 /* flush required after changing the IC bit */ 999 1000 ASM_JMP(%g4, fast_ecc_err) 1001 SET_SIZE(fecc_err_instr) 1002 1003 1004 #if !(defined(JALAPENO) || defined(SERRANO)) 1005 1006 .section ".text" 1007 .align 64 1008 ENTRY_NP(fast_ecc_err) 1009 1010 /* 1011 * Turn off CEEN and NCEEN. 1012 */ 1013 ldxa [%g0]ASI_ESTATE_ERR, %g3 1014 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4 1015 stxa %g4, [%g0]ASI_ESTATE_ERR 1016 membar #Sync ! membar sync required 1017 1018 /* 1019 * Check to see whether we need to park our sibling core 1020 * before recording diagnostic information from caches 1021 * which may be shared by both cores. 1022 * We use %g1 to store information about whether or not 1023 * we had to park the core (%g1 holds our DCUCR value and 1024 * we only use bits from that register which are "reserved" 1025 * to keep track of core parking) so that we know whether 1026 * or not to unpark later. %g5 and %g4 are scratch registers. 1027 */ 1028 PARK_SIBLING_CORE(%g1, %g5, %g4) 1029 1030 /* 1031 * Do the CPU log out capture. 1032 * %g3 = "failed?" return value. 1033 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1034 * into this macro via %g4. Output only valid if cpu_private 1035 * struct has not been initialized. 1036 * CHPR_FECCTL0_LOGOUT = cpu logout structure offset input 1037 * %g4 = Trap information stored in the cpu logout flags field 1038 * %g5 = scr1 1039 * %g6 = scr2 1040 * %g3 = scr3 1041 * %g4 = scr4 1042 */ 1043 /* store the CEEN and NCEEN values, TL=0 */ 1044 and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4 1045 set CHPR_FECCTL0_LOGOUT, %g6 1046 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1047 1048 /* 1049 * Flush the Ecache (and L2 cache for Panther) to get the error out 1050 * of the Ecache. If the UCC or UCU is on a dirty line, then the 1051 * following flush will turn that into a WDC or WDU, respectively. 1052 */ 1053 PN_L2_FLUSHALL(%g4, %g5, %g6) 1054 1055 CPU_INDEX(%g4, %g5) 1056 mulx %g4, CPU_NODE_SIZE, %g4 1057 set cpunodes, %g5 1058 add %g4, %g5, %g4 1059 ld [%g4 + ECACHE_LINESIZE], %g5 1060 ld [%g4 + ECACHE_SIZE], %g4 1061 1062 ASM_LDX(%g6, ecache_flushaddr) 1063 ECACHE_FLUSHALL(%g4, %g5, %g6, %g7) 1064 1065 /* 1066 * Flush the Dcache. Since bad data could have been installed in 1067 * the Dcache we must flush it before re-enabling it. 1068 */ 1069 ASM_LD(%g5, dcache_size) 1070 ASM_LD(%g6, dcache_linesize) 1071 CH_DCACHE_FLUSHALL(%g5, %g6, %g7) 1072 1073 /* 1074 * Flush the Icache. Since we turned off the Icache to capture the 1075 * Icache line it is now stale or corrupted and we must flush it 1076 * before re-enabling it. 1077 */ 1078 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5); 1079 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1080 ba,pt %icc, 6f 1081 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1082 fast_ecc_err_5: 1083 ASM_LD(%g5, icache_size) 1084 ASM_LD(%g6, icache_linesize) 1085 6: 1086 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1087 1088 /* 1089 * check to see whether we parked our sibling core at the start 1090 * of this handler. If so, we need to unpark it here. 1091 * We use DCUCR reserved bits (stored in %g1) to keep track of 1092 * whether or not we need to unpark. %g5 and %g4 are scratch registers. 1093 */ 1094 UNPARK_SIBLING_CORE(%g1, %g5, %g4) 1095 1096 /* 1097 * Restore the Dcache and Icache to the previous state. 1098 */ 1099 stxa %g1, [%g0]ASI_DCU 1100 flush %g0 /* flush required after changing the IC bit */ 1101 1102 /* 1103 * Make sure our CPU logout operation was successful. 1104 */ 1105 cmp %g3, %g0 1106 be 8f 1107 nop 1108 1109 /* 1110 * If the logout structure had been busy, how many times have 1111 * we tried to use it and failed (nesting count)? If we have 1112 * already recursed a substantial number of times, then we can 1113 * assume things are not going to get better by themselves and 1114 * so it would be best to panic. 1115 */ 1116 cmp %g3, CLO_NESTING_MAX 1117 blt 7f 1118 nop 1119 1120 call ptl1_panic 1121 mov PTL1_BAD_ECC, %g1 1122 1123 7: 1124 /* 1125 * Otherwise, if the logout structure was busy but we have not 1126 * nested more times than our maximum value, then we simply 1127 * issue a retry. Our TL=0 trap handler code will check and 1128 * clear the AFSR after it is done logging what is currently 1129 * in the logout struct and handle this event at that time. 1130 */ 1131 retry 1132 8: 1133 /* 1134 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're 1135 * already at PIL 15. 1136 */ 1137 set cpu_fast_ecc_error, %g1 1138 rdpr %pil, %g4 1139 cmp %g4, PIL_14 1140 ba sys_trap 1141 movl %icc, PIL_14, %g4 1142 1143 SET_SIZE(fast_ecc_err) 1144 1145 #endif /* !(JALAPENO || SERRANO) */ 1146 1147 1148 /* 1149 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy: 1150 * 1151 * The basic flow of this trap handler is as follows: 1152 * 1153 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a 1154 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we 1155 * will use to save %g1 and %g2. 1156 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr), 1157 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc 1158 * handler (using the just saved %g1). 1159 * 3) Turn off the Dcache if it was on and save the state of the Dcache 1160 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate. 1161 * NB: we don't turn off the Icache because bad data is not installed nor 1162 * will we be doing any diagnostic accesses. 1163 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2 1164 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the 1165 * %tpc, %tnpc, %tstate values previously saved). 1166 * 6) set %tl to %tl - 1. 1167 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure. 1168 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field. 1169 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For 1170 * Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear. 1171 * Save the values in ch_err_tl1_data. For Panther, read the shadow 1172 * AFSR_EXT and save the value in ch_err_tl1_data. 1173 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from 1174 * being queued. We'll report them via the AFSR/AFAR capture in step 13. 1175 * 11) Flush the Ecache. 1176 * NB: the Ecache is flushed assuming the largest possible size with 1177 * the smallest possible line size since access to the cpu_nodes may 1178 * cause an unrecoverable DTLB miss. 1179 * 12) Reenable CEEN/NCEEN with the value saved from step 10. 1180 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again. 1181 * For Cheetah+ (and later), read the primary AFAR and AFSR and now clear. 1182 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther, 1183 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data. 1184 * 14) Flush and re-enable the Dcache if it was on at step 3. 1185 * 15) Do TRAPTRACE if enabled. 1186 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so. 1187 * 17) Set the event pending flag in ch_err_tl1_pending[CPU] 1188 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the 1189 * event pending flag and call cpu_tl1_error via systrap if set. 1190 * 19) Restore the registers from step 5 and issue retry. 1191 */ 1192 1193 /* 1194 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0 1195 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various 1196 * architecture-specific files. This generates a "Software Trap 0" at TL>0, 1197 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there. 1198 * NB: Must be 8 instructions or less to fit in trap table and code must 1199 * be relocatable. 1200 */ 1201 1202 ENTRY_NP(fecc_err_tl1_instr) 1203 CH_ERR_TL1_TRAPENTRY(SWTRAP_0); 1204 SET_SIZE(fecc_err_tl1_instr) 1205 1206 /* 1207 * Software trap 0 at TL>0. 1208 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of 1209 * the various architecture-specific files. This is used as a continuation 1210 * of the fast ecc handling where we've bought an extra TL level, so we can 1211 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1212 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1213 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1214 * order two bits from %g1 and %g2 respectively). 1215 * NB: Must be 8 instructions or less to fit in trap table and code must 1216 * be relocatable. 1217 */ 1218 1219 ENTRY_NP(fecc_err_tl1_cont_instr) 1220 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err) 1221 SET_SIZE(fecc_err_tl1_cont_instr) 1222 1223 1224 /* 1225 * The ce_err function handles disrupting trap type 0x63 at TL=0. 1226 * 1227 * AFSR errors bits which cause this trap are: 1228 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC 1229 * 1230 * NCEEN Bit of Cheetah External Cache Error Enable Register enables 1231 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU 1232 * 1233 * CEEN Bit of Cheetah External Cache Error Enable Register enables 1234 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC 1235 * 1236 * Cheetah+ also handles (No additional processing required): 1237 * DUE, DTO, DBERR (NCEEN controlled) 1238 * THCE (CEEN and ET_ECC_en controlled) 1239 * TUE (ET_ECC_en controlled) 1240 * 1241 * Panther further adds: 1242 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1243 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1244 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1245 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1246 * THCE (CEEN and L2_tag_ECC_en controlled) 1247 * L3_THCE (CEEN and ET_ECC_en controlled) 1248 * 1249 * Steps: 1250 * 1. Disable hardware corrected disrupting errors only (CEEN) 1251 * 2. Park sibling core if caches are shared (to avoid race 1252 * condition while accessing shared resources such as L3 1253 * data staging register during CPU logout. 1254 * 3. If the CPU logout structure is not currently being used: 1255 * 4. Clear AFSR error bits 1256 * 5. Capture Ecache, Dcache and Icache lines associated 1257 * with AFAR. 1258 * 6. Unpark sibling core if we parked it earlier. 1259 * 7. call cpu_disrupting_error via sys_trap at PIL 14 1260 * unless we're already running at PIL 15. 1261 * 4. Otherwise, if the CPU logout structure is busy: 1262 * 5. Incriment "logout busy count" and place into %g3 1263 * 6. Unpark sibling core if we parked it earlier. 1264 * 7. Issue a retry since the other CPU error logging 1265 * code will end up finding this error bit and logging 1266 * information about it later. 1267 * 5. Alternatively (to 3 and 4 above), if the cpu_private struct is 1268 * not yet initialized such that we can't even check the logout 1269 * struct, then we place the clo_flags data into %g2 1270 * (sys_trap->have_win arg #1) and call cpu_disrupting_error via 1271 * systrap. The clo_flags parameter is used to determine information 1272 * such as TL, TT, CEEN settings, etc in the high level trap 1273 * handler since we don't have access to detailed logout information 1274 * in cases where the cpu_private struct is not yet initialized. 1275 * 1276 * %g3: [ logout busy count ] - arg #2 1277 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1 1278 */ 1279 1280 .align 128 1281 ENTRY_NP(ce_err) 1282 membar #Sync ! Cheetah requires membar #Sync 1283 1284 /* 1285 * Disable trap on hardware corrected errors (CEEN) while at TL=0 1286 * to prevent recursion. 1287 */ 1288 ldxa [%g0]ASI_ESTATE_ERR, %g1 1289 bclr EN_REG_CEEN, %g1 1290 stxa %g1, [%g0]ASI_ESTATE_ERR 1291 membar #Sync ! membar sync required 1292 1293 /* 1294 * Save current DCU state. Turn off Icache to allow capture of 1295 * Icache data by DO_CPU_LOGOUT. 1296 */ 1297 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1298 andn %g1, DCU_IC, %g4 1299 stxa %g4, [%g0]ASI_DCU 1300 flush %g0 /* flush required after changing the IC bit */ 1301 1302 /* 1303 * Check to see whether we need to park our sibling core 1304 * before recording diagnostic information from caches 1305 * which may be shared by both cores. 1306 * We use %g1 to store information about whether or not 1307 * we had to park the core (%g1 holds our DCUCR value and 1308 * we only use bits from that register which are "reserved" 1309 * to keep track of core parking) so that we know whether 1310 * or not to unpark later. %g5 and %g4 are scratch registers. 1311 */ 1312 PARK_SIBLING_CORE(%g1, %g5, %g4) 1313 1314 /* 1315 * Do the CPU log out capture. 1316 * %g3 = "failed?" return value. 1317 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1318 * into this macro via %g4. Output only valid if cpu_private 1319 * struct has not been initialized. 1320 * CHPR_CECC_LOGOUT = cpu logout structure offset input 1321 * %g4 = Trap information stored in the cpu logout flags field 1322 * %g5 = scr1 1323 * %g6 = scr2 1324 * %g3 = scr3 1325 * %g4 = scr4 1326 */ 1327 clr %g4 ! TL=0 bit in afsr 1328 set CHPR_CECC_LOGOUT, %g6 1329 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1330 1331 /* 1332 * Flush the Icache. Since we turned off the Icache to capture the 1333 * Icache line it is now stale or corrupted and we must flush it 1334 * before re-enabling it. 1335 */ 1336 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1); 1337 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1338 ba,pt %icc, 2f 1339 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1340 ce_err_1: 1341 ASM_LD(%g5, icache_size) 1342 ASM_LD(%g6, icache_linesize) 1343 2: 1344 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1345 1346 /* 1347 * check to see whether we parked our sibling core at the start 1348 * of this handler. If so, we need to unpark it here. 1349 * We use DCUCR reserved bits (stored in %g1) to keep track of 1350 * whether or not we need to unpark. %g5 and %g4 are scratch registers. 1351 */ 1352 UNPARK_SIBLING_CORE(%g1, %g5, %g4) 1353 1354 /* 1355 * Restore Icache to previous state. 1356 */ 1357 stxa %g1, [%g0]ASI_DCU 1358 flush %g0 /* flush required after changing the IC bit */ 1359 1360 /* 1361 * Make sure our CPU logout operation was successful. 1362 */ 1363 cmp %g3, %g0 1364 be 4f 1365 nop 1366 1367 /* 1368 * If the logout structure had been busy, how many times have 1369 * we tried to use it and failed (nesting count)? If we have 1370 * already recursed a substantial number of times, then we can 1371 * assume things are not going to get better by themselves and 1372 * so it would be best to panic. 1373 */ 1374 cmp %g3, CLO_NESTING_MAX 1375 blt 3f 1376 nop 1377 1378 call ptl1_panic 1379 mov PTL1_BAD_ECC, %g1 1380 1381 3: 1382 /* 1383 * Otherwise, if the logout structure was busy but we have not 1384 * nested more times than our maximum value, then we simply 1385 * issue a retry. Our TL=0 trap handler code will check and 1386 * clear the AFSR after it is done logging what is currently 1387 * in the logout struct and handle this event at that time. 1388 */ 1389 retry 1390 4: 1391 /* 1392 * Call cpu_disrupting_error via systrap at PIL 14 unless we're 1393 * already at PIL 15. 1394 */ 1395 set cpu_disrupting_error, %g1 1396 rdpr %pil, %g4 1397 cmp %g4, PIL_14 1398 ba sys_trap 1399 movl %icc, PIL_14, %g4 1400 SET_SIZE(ce_err) 1401 1402 1403 /* 1404 * This trap cannot happen at TL>0 which means this routine will never 1405 * actually be called and so we treat this like a BAD TRAP panic. 1406 */ 1407 .align 64 1408 ENTRY_NP(ce_err_tl1) 1409 1410 call ptl1_panic 1411 mov PTL1_BAD_TRAP, %g1 1412 1413 SET_SIZE(ce_err_tl1) 1414 1415 1416 /* 1417 * The async_err function handles deferred trap types 0xA 1418 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0. 1419 * 1420 * AFSR errors bits which cause this trap are: 1421 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1422 * On some platforms, EMU may causes cheetah to pull the error pin 1423 * never giving Solaris a chance to take a trap. 1424 * 1425 * NCEEN Bit of Cheetah External Cache Error Enable Register enables 1426 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR 1427 * 1428 * Steps: 1429 * 1. Disable CEEN and NCEEN errors to prevent recursive errors. 1430 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture 1431 * I$ line in DO_CPU_LOGOUT. 1432 * 3. Park sibling core if caches are shared (to avoid race 1433 * condition while accessing shared resources such as L3 1434 * data staging register during CPU logout. 1435 * 4. If the CPU logout structure is not currently being used: 1436 * 5. Clear AFSR error bits 1437 * 6. Capture Ecache, Dcache and Icache lines associated 1438 * with AFAR. 1439 * 7. Unpark sibling core if we parked it earlier. 1440 * 8. call cpu_deferred_error via sys_trap. 1441 * 5. Otherwise, if the CPU logout structure is busy: 1442 * 6. Incriment "logout busy count" 1443 * 7. Unpark sibling core if we parked it earlier. 1444 * 8) Issue a retry since the other CPU error logging 1445 * code will end up finding this error bit and logging 1446 * information about it later. 1447 * 6. Alternatively (to 4 and 5 above), if the cpu_private struct is 1448 * not yet initialized such that we can't even check the logout 1449 * struct, then we place the clo_flags data into %g2 1450 * (sys_trap->have_win arg #1) and call cpu_deferred_error via 1451 * systrap. The clo_flags parameter is used to determine information 1452 * such as TL, TT, CEEN settings, etc in the high level trap handler 1453 * since we don't have access to detailed logout information in cases 1454 * where the cpu_private struct is not yet initialized. 1455 * 1456 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1 1457 * %g3: [ logout busy count ] - arg #2 1458 */ 1459 1460 ENTRY_NP(async_err) 1461 membar #Sync ! Cheetah requires membar #Sync 1462 1463 /* 1464 * Disable CEEN and NCEEN. 1465 */ 1466 ldxa [%g0]ASI_ESTATE_ERR, %g3 1467 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4 1468 stxa %g4, [%g0]ASI_ESTATE_ERR 1469 membar #Sync ! membar sync required 1470 1471 /* 1472 * Save current DCU state. 1473 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT. 1474 * Do this regardless of whether this is a Data Access Error or 1475 * Instruction Access Error Trap. 1476 * Disable Dcache for both Data Access Error and Instruction Access 1477 * Error per Cheetah PRM P.5 Note 6. 1478 */ 1479 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1480 andn %g1, DCU_IC + DCU_DC, %g4 1481 stxa %g4, [%g0]ASI_DCU 1482 flush %g0 /* flush required after changing the IC bit */ 1483 1484 /* 1485 * Check to see whether we need to park our sibling core 1486 * before recording diagnostic information from caches 1487 * which may be shared by both cores. 1488 * We use %g1 to store information about whether or not 1489 * we had to park the core (%g1 holds our DCUCR value and 1490 * we only use bits from that register which are "reserved" 1491 * to keep track of core parking) so that we know whether 1492 * or not to unpark later. %g6 and %g4 are scratch registers. 1493 */ 1494 PARK_SIBLING_CORE(%g1, %g6, %g4) 1495 1496 /* 1497 * Do the CPU logout capture. 1498 * 1499 * %g3 = "failed?" return value. 1500 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1501 * into this macro via %g4. Output only valid if cpu_private 1502 * struct has not been initialized. 1503 * CHPR_ASYNC_LOGOUT = cpu logout structure offset input 1504 * %g4 = Trap information stored in the cpu logout flags field 1505 * %g5 = scr1 1506 * %g6 = scr2 1507 * %g3 = scr3 1508 * %g4 = scr4 1509 */ 1510 andcc %g5, T_TL1, %g0 1511 clr %g6 1512 movnz %xcc, 1, %g6 ! set %g6 if T_TL1 set 1513 sllx %g6, CLO_FLAGS_TL_SHIFT, %g6 1514 sllx %g5, CLO_FLAGS_TT_SHIFT, %g4 1515 set CLO_FLAGS_TT_MASK, %g2 1516 and %g4, %g2, %g4 ! ttype 1517 or %g6, %g4, %g4 ! TT and TL 1518 and %g3, EN_REG_CEEN, %g3 ! CEEN value 1519 or %g3, %g4, %g4 ! TT and TL and CEEN 1520 set CHPR_ASYNC_LOGOUT, %g6 1521 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1522 1523 /* 1524 * If the logout struct was busy, we may need to pass the 1525 * TT, TL, and CEEN information to the TL=0 handler via 1526 * systrap parameter so save it off here. 1527 */ 1528 cmp %g3, %g0 1529 be 1f 1530 nop 1531 sllx %g4, 32, %g4 1532 or %g4, %g3, %g3 1533 1: 1534 /* 1535 * Flush the Icache. Since we turned off the Icache to capture the 1536 * Icache line it is now stale or corrupted and we must flush it 1537 * before re-enabling it. 1538 */ 1539 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1); 1540 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1541 ba,pt %icc, 2f 1542 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1543 async_err_1: 1544 ASM_LD(%g5, icache_size) 1545 ASM_LD(%g6, icache_linesize) 1546 2: 1547 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1548 1549 /* 1550 * XXX - Don't we need to flush the Dcache before turning it back 1551 * on to avoid stale or corrupt data? Was this broken? 1552 */ 1553 /* 1554 * Flush the Dcache before turning it back on since it may now 1555 * contain stale or corrupt data. 1556 */ 1557 ASM_LD(%g5, dcache_size) 1558 ASM_LD(%g6, dcache_linesize) 1559 CH_DCACHE_FLUSHALL(%g5, %g6, %g7) 1560 1561 /* 1562 * check to see whether we parked our sibling core at the start 1563 * of this handler. If so, we need to unpark it here. 1564 * We use DCUCR reserved bits (stored in %g1) to keep track of 1565 * whether or not we need to unpark. %g5 and %g7 are scratch registers. 1566 */ 1567 UNPARK_SIBLING_CORE(%g1, %g5, %g7) 1568 1569 /* 1570 * Restore Icache and Dcache to previous state. 1571 */ 1572 stxa %g1, [%g0]ASI_DCU 1573 flush %g0 /* flush required after changing the IC bit */ 1574 1575 /* 1576 * Make sure our CPU logout operation was successful. 1577 */ 1578 cmp %g3, %g0 1579 be 4f 1580 nop 1581 1582 /* 1583 * If the logout structure had been busy, how many times have 1584 * we tried to use it and failed (nesting count)? If we have 1585 * already recursed a substantial number of times, then we can 1586 * assume things are not going to get better by themselves and 1587 * so it would be best to panic. 1588 */ 1589 cmp %g3, CLO_NESTING_MAX 1590 blt 3f 1591 nop 1592 1593 call ptl1_panic 1594 mov PTL1_BAD_ECC, %g1 1595 1596 3: 1597 /* 1598 * Otherwise, if the logout structure was busy but we have not 1599 * nested more times than our maximum value, then we simply 1600 * issue a retry. Our TL=0 trap handler code will check and 1601 * clear the AFSR after it is done logging what is currently 1602 * in the logout struct and handle this event at that time. 1603 */ 1604 retry 1605 4: 1606 RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip) 1607 async_err_resetskip: 1608 set cpu_deferred_error, %g1 1609 ba sys_trap 1610 mov PIL_15, %g4 ! run at pil 15 1611 SET_SIZE(async_err) 1612 1613 #if defined(CPU_IMP_L1_CACHE_PARITY) 1614 1615 /* 1616 * D$ parity error trap (trap 71) at TL=0. 1617 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of 1618 * the various architecture-specific files. This merely sets up the 1619 * arguments for cpu_parity_error and calls it via sys_trap. 1620 * NB: Must be 8 instructions or less to fit in trap table and code must 1621 * be relocatable. 1622 */ 1623 ENTRY_NP(dcache_parity_instr) 1624 membar #Sync ! Cheetah+ requires membar #Sync 1625 set cpu_parity_error, %g1 1626 or %g0, CH_ERR_DPE, %g2 1627 rdpr %tpc, %g3 1628 sethi %hi(sys_trap), %g7 1629 jmp %g7 + %lo(sys_trap) 1630 mov PIL_15, %g4 ! run at pil 15 1631 SET_SIZE(dcache_parity_instr) 1632 1633 1634 /* 1635 * D$ parity error trap (trap 71) at TL>0. 1636 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of 1637 * the various architecture-specific files. This generates a "Software 1638 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we 1639 * continue the handling there. 1640 * NB: Must be 8 instructions or less to fit in trap table and code must 1641 * be relocatable. 1642 */ 1643 ENTRY_NP(dcache_parity_tl1_instr) 1644 CH_ERR_TL1_TRAPENTRY(SWTRAP_1); 1645 SET_SIZE(dcache_parity_tl1_instr) 1646 1647 1648 /* 1649 * Software trap 1 at TL>0. 1650 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap 1651 * of the various architecture-specific files. This is used as a continuation 1652 * of the dcache parity handling where we've bought an extra TL level, so we 1653 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1654 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1655 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1656 * order two bits from %g1 and %g2 respectively). 1657 * NB: Must be 8 instructions or less to fit in trap table and code must 1658 * be relocatable. 1659 */ 1660 ENTRY_NP(dcache_parity_tl1_cont_instr) 1661 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err); 1662 SET_SIZE(dcache_parity_tl1_cont_instr) 1663 1664 /* 1665 * D$ parity error at TL>0 handler 1666 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter 1667 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate. 1668 */ 1669 1670 ENTRY_NP(dcache_parity_tl1_err) 1671 1672 /* 1673 * This macro saves all the %g registers in the ch_err_tl1_data 1674 * structure, updates the ch_err_tl1_flags and saves the %tpc in 1675 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to 1676 * the ch_err_tl1_data structure and %g2 will have the original 1677 * flags in the ch_err_tl1_data structure. All %g registers 1678 * except for %g1 and %g2 will be available. 1679 */ 1680 CH_ERR_TL1_ENTER(CH_ERR_DPE); 1681 1682 #ifdef TRAPTRACE 1683 /* 1684 * Get current trap trace entry physical pointer. 1685 */ 1686 CPU_INDEX(%g6, %g5) 1687 sll %g6, TRAPTR_SIZE_SHIFT, %g6 1688 set trap_trace_ctl, %g5 1689 add %g6, %g5, %g6 1690 ld [%g6 + TRAPTR_LIMIT], %g5 1691 tst %g5 1692 be %icc, dpe_tl1_skip_tt 1693 nop 1694 ldx [%g6 + TRAPTR_PBASE], %g5 1695 ld [%g6 + TRAPTR_OFFSET], %g4 1696 add %g5, %g4, %g5 1697 1698 /* 1699 * Create trap trace entry. 1700 */ 1701 rd %asi, %g7 1702 wr %g0, TRAPTR_ASI, %asi 1703 rd STICK, %g4 1704 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi 1705 rdpr %tl, %g4 1706 stha %g4, [%g5 + TRAP_ENT_TL]%asi 1707 rdpr %tt, %g4 1708 stha %g4, [%g5 + TRAP_ENT_TT]%asi 1709 rdpr %tpc, %g4 1710 stna %g4, [%g5 + TRAP_ENT_TPC]%asi 1711 rdpr %tstate, %g4 1712 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi 1713 stna %sp, [%g5 + TRAP_ENT_SP]%asi 1714 stna %g0, [%g5 + TRAP_ENT_TR]%asi 1715 stna %g0, [%g5 + TRAP_ENT_F1]%asi 1716 stna %g0, [%g5 + TRAP_ENT_F2]%asi 1717 stna %g0, [%g5 + TRAP_ENT_F3]%asi 1718 stna %g0, [%g5 + TRAP_ENT_F4]%asi 1719 wr %g0, %g7, %asi 1720 1721 /* 1722 * Advance trap trace pointer. 1723 */ 1724 ld [%g6 + TRAPTR_OFFSET], %g5 1725 ld [%g6 + TRAPTR_LIMIT], %g4 1726 st %g5, [%g6 + TRAPTR_LAST_OFFSET] 1727 add %g5, TRAP_ENT_SIZE, %g5 1728 sub %g4, TRAP_ENT_SIZE, %g4 1729 cmp %g5, %g4 1730 movge %icc, 0, %g5 1731 st %g5, [%g6 + TRAPTR_OFFSET] 1732 dpe_tl1_skip_tt: 1733 #endif /* TRAPTRACE */ 1734 1735 /* 1736 * I$ and D$ are automatically turned off by HW when the CPU hits 1737 * a dcache or icache parity error so we will just leave those two 1738 * off for now to avoid repeating this trap. 1739 * For Panther, however, since we trap on P$ data parity errors 1740 * and HW does not automatically disable P$, we need to disable it 1741 * here so that we don't encounter any recursive traps when we 1742 * issue the retry. 1743 */ 1744 ldxa [%g0]ASI_DCU, %g3 1745 mov 1, %g4 1746 sllx %g4, DCU_PE_SHIFT, %g4 1747 andn %g3, %g4, %g3 1748 stxa %g3, [%g0]ASI_DCU 1749 membar #Sync 1750 1751 /* 1752 * We fall into this macro if we've successfully logged the error in 1753 * the ch_err_tl1_data structure and want the PIL15 softint to pick 1754 * it up and log it. %g1 must point to the ch_err_tl1_data structure. 1755 * Restores the %g registers and issues retry. 1756 */ 1757 CH_ERR_TL1_EXIT; 1758 SET_SIZE(dcache_parity_tl1_err) 1759 1760 /* 1761 * I$ parity error trap (trap 72) at TL=0. 1762 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of 1763 * the various architecture-specific files. This merely sets up the 1764 * arguments for cpu_parity_error and calls it via sys_trap. 1765 * NB: Must be 8 instructions or less to fit in trap table and code must 1766 * be relocatable. 1767 */ 1768 1769 ENTRY_NP(icache_parity_instr) 1770 membar #Sync ! Cheetah+ requires membar #Sync 1771 set cpu_parity_error, %g1 1772 or %g0, CH_ERR_IPE, %g2 1773 rdpr %tpc, %g3 1774 sethi %hi(sys_trap), %g7 1775 jmp %g7 + %lo(sys_trap) 1776 mov PIL_15, %g4 ! run at pil 15 1777 SET_SIZE(icache_parity_instr) 1778 1779 /* 1780 * I$ parity error trap (trap 72) at TL>0. 1781 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of 1782 * the various architecture-specific files. This generates a "Software 1783 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we 1784 * continue the handling there. 1785 * NB: Must be 8 instructions or less to fit in trap table and code must 1786 * be relocatable. 1787 */ 1788 ENTRY_NP(icache_parity_tl1_instr) 1789 CH_ERR_TL1_TRAPENTRY(SWTRAP_2); 1790 SET_SIZE(icache_parity_tl1_instr) 1791 1792 /* 1793 * Software trap 2 at TL>0. 1794 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap 1795 * of the various architecture-specific files. This is used as a continuation 1796 * of the icache parity handling where we've bought an extra TL level, so we 1797 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1798 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1799 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1800 * order two bits from %g1 and %g2 respectively). 1801 * NB: Must be 8 instructions or less to fit in trap table and code must 1802 * be relocatable. 1803 */ 1804 ENTRY_NP(icache_parity_tl1_cont_instr) 1805 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err); 1806 SET_SIZE(icache_parity_tl1_cont_instr) 1807 1808 1809 /* 1810 * I$ parity error at TL>0 handler 1811 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter 1812 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate. 1813 */ 1814 1815 ENTRY_NP(icache_parity_tl1_err) 1816 1817 /* 1818 * This macro saves all the %g registers in the ch_err_tl1_data 1819 * structure, updates the ch_err_tl1_flags and saves the %tpc in 1820 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to 1821 * the ch_err_tl1_data structure and %g2 will have the original 1822 * flags in the ch_err_tl1_data structure. All %g registers 1823 * except for %g1 and %g2 will be available. 1824 */ 1825 CH_ERR_TL1_ENTER(CH_ERR_IPE); 1826 1827 #ifdef TRAPTRACE 1828 /* 1829 * Get current trap trace entry physical pointer. 1830 */ 1831 CPU_INDEX(%g6, %g5) 1832 sll %g6, TRAPTR_SIZE_SHIFT, %g6 1833 set trap_trace_ctl, %g5 1834 add %g6, %g5, %g6 1835 ld [%g6 + TRAPTR_LIMIT], %g5 1836 tst %g5 1837 be %icc, ipe_tl1_skip_tt 1838 nop 1839 ldx [%g6 + TRAPTR_PBASE], %g5 1840 ld [%g6 + TRAPTR_OFFSET], %g4 1841 add %g5, %g4, %g5 1842 1843 /* 1844 * Create trap trace entry. 1845 */ 1846 rd %asi, %g7 1847 wr %g0, TRAPTR_ASI, %asi 1848 rd STICK, %g4 1849 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi 1850 rdpr %tl, %g4 1851 stha %g4, [%g5 + TRAP_ENT_TL]%asi 1852 rdpr %tt, %g4 1853 stha %g4, [%g5 + TRAP_ENT_TT]%asi 1854 rdpr %tpc, %g4 1855 stna %g4, [%g5 + TRAP_ENT_TPC]%asi 1856 rdpr %tstate, %g4 1857 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi 1858 stna %sp, [%g5 + TRAP_ENT_SP]%asi 1859 stna %g0, [%g5 + TRAP_ENT_TR]%asi 1860 stna %g0, [%g5 + TRAP_ENT_F1]%asi 1861 stna %g0, [%g5 + TRAP_ENT_F2]%asi 1862 stna %g0, [%g5 + TRAP_ENT_F3]%asi 1863 stna %g0, [%g5 + TRAP_ENT_F4]%asi 1864 wr %g0, %g7, %asi 1865 1866 /* 1867 * Advance trap trace pointer. 1868 */ 1869 ld [%g6 + TRAPTR_OFFSET], %g5 1870 ld [%g6 + TRAPTR_LIMIT], %g4 1871 st %g5, [%g6 + TRAPTR_LAST_OFFSET] 1872 add %g5, TRAP_ENT_SIZE, %g5 1873 sub %g4, TRAP_ENT_SIZE, %g4 1874 cmp %g5, %g4 1875 movge %icc, 0, %g5 1876 st %g5, [%g6 + TRAPTR_OFFSET] 1877 ipe_tl1_skip_tt: 1878 #endif /* TRAPTRACE */ 1879 1880 /* 1881 * We fall into this macro if we've successfully logged the error in 1882 * the ch_err_tl1_data structure and want the PIL15 softint to pick 1883 * it up and log it. %g1 must point to the ch_err_tl1_data structure. 1884 * Restores the %g registers and issues retry. 1885 */ 1886 CH_ERR_TL1_EXIT; 1887 1888 SET_SIZE(icache_parity_tl1_err) 1889 1890 #endif /* CPU_IMP_L1_CACHE_PARITY */ 1891 1892 1893 /* 1894 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the 1895 * tte, the virtual address, and the ctxnum of the specified tlb entry. They 1896 * should only be used in places where you have no choice but to look at the 1897 * tlb itself. 1898 * 1899 * Note: These two routines are required by the Estar "cpr" loadable module. 1900 */ 1901 1902 ENTRY_NP(itlb_rd_entry) 1903 sllx %o0, 3, %o0 1904 ldxa [%o0]ASI_ITLB_ACCESS, %g1 1905 stx %g1, [%o1] 1906 ldxa [%o0]ASI_ITLB_TAGREAD, %g2 1907 set TAGREAD_CTX_MASK, %o4 1908 andn %g2, %o4, %o5 1909 retl 1910 stx %o5, [%o2] 1911 SET_SIZE(itlb_rd_entry) 1912 1913 1914 ENTRY_NP(dtlb_rd_entry) 1915 sllx %o0, 3, %o0 1916 ldxa [%o0]ASI_DTLB_ACCESS, %g1 1917 stx %g1, [%o1] 1918 ldxa [%o0]ASI_DTLB_TAGREAD, %g2 1919 set TAGREAD_CTX_MASK, %o4 1920 andn %g2, %o4, %o5 1921 retl 1922 stx %o5, [%o2] 1923 SET_SIZE(dtlb_rd_entry) 1924 1925 1926 #if !(defined(JALAPENO) || defined(SERRANO)) 1927 1928 ENTRY(get_safari_config) 1929 ldxa [%g0]ASI_SAFARI_CONFIG, %o0 1930 retl 1931 nop 1932 SET_SIZE(get_safari_config) 1933 1934 1935 ENTRY(set_safari_config) 1936 stxa %o0, [%g0]ASI_SAFARI_CONFIG 1937 membar #Sync 1938 retl 1939 nop 1940 SET_SIZE(set_safari_config) 1941 1942 #endif /* !(JALAPENO || SERRANO) */ 1943 1944 1945 /* 1946 * Clear the NPT (non-privileged trap) bit in the %tick/%stick 1947 * registers. In an effort to make the change in the 1948 * tick/stick counter as consistent as possible, we disable 1949 * all interrupts while we're changing the registers. We also 1950 * ensure that the read and write instructions are in the same 1951 * line in the instruction cache. 1952 */ 1953 ENTRY_NP(cpu_clearticknpt) 1954 rdpr %pstate, %g1 /* save processor state */ 1955 andn %g1, PSTATE_IE, %g3 /* turn off */ 1956 wrpr %g0, %g3, %pstate /* interrupts */ 1957 rdpr %tick, %g2 /* get tick register */ 1958 brgez,pn %g2, 1f /* if NPT bit off, we're done */ 1959 mov 1, %g3 /* create mask */ 1960 sllx %g3, 63, %g3 /* for NPT bit */ 1961 ba,a,pt %xcc, 2f 1962 .align 8 /* Ensure rd/wr in same i$ line */ 1963 2: 1964 rdpr %tick, %g2 /* get tick register */ 1965 wrpr %g3, %g2, %tick /* write tick register, */ 1966 /* clearing NPT bit */ 1967 1: 1968 rd STICK, %g2 /* get stick register */ 1969 brgez,pn %g2, 3f /* if NPT bit off, we're done */ 1970 mov 1, %g3 /* create mask */ 1971 sllx %g3, 63, %g3 /* for NPT bit */ 1972 ba,a,pt %xcc, 4f 1973 .align 8 /* Ensure rd/wr in same i$ line */ 1974 4: 1975 rd STICK, %g2 /* get stick register */ 1976 wr %g3, %g2, STICK /* write stick register, */ 1977 /* clearing NPT bit */ 1978 3: 1979 jmp %g4 + 4 1980 wrpr %g0, %g1, %pstate /* restore processor state */ 1981 1982 SET_SIZE(cpu_clearticknpt) 1983 1984 1985 #if defined(CPU_IMP_L1_CACHE_PARITY) 1986 1987 /* 1988 * correct_dcache_parity(size_t size, size_t linesize) 1989 * 1990 * Correct D$ data parity by zeroing the data and initializing microtag 1991 * for all indexes and all ways of the D$. 1992 * 1993 */ 1994 ENTRY(correct_dcache_parity) 1995 /* 1996 * Register Usage: 1997 * 1998 * %o0 = input D$ size 1999 * %o1 = input D$ line size 2000 * %o2 = scratch 2001 * %o3 = scratch 2002 * %o4 = scratch 2003 */ 2004 2005 sub %o0, %o1, %o0 ! init cache line address 2006 2007 /* 2008 * For Panther CPUs, we also need to clear the data parity bits 2009 * using DC_data_parity bit of the ASI_DCACHE_DATA register. 2010 */ 2011 GET_CPU_IMPL(%o3) 2012 cmp %o3, PANTHER_IMPL 2013 bne 1f 2014 clr %o3 ! zero for non-Panther 2015 mov 1, %o3 2016 sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3 2017 2018 1: 2019 /* 2020 * Set utag = way since it must be unique within an index. 2021 */ 2022 srl %o0, 14, %o2 ! get cache way (DC_way) 2023 membar #Sync ! required before ASI_DC_UTAG 2024 stxa %o2, [%o0]ASI_DC_UTAG ! set D$ utag = cache way 2025 membar #Sync ! required after ASI_DC_UTAG 2026 2027 /* 2028 * Zero line of D$ data (and data parity bits for Panther) 2029 */ 2030 sub %o1, 8, %o2 2031 or %o0, %o3, %o4 ! same address + DC_data_parity 2032 2: 2033 membar #Sync ! required before ASI_DC_DATA 2034 stxa %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data 2035 membar #Sync ! required after ASI_DC_DATA 2036 /* 2037 * We also clear the parity bits if this is a panther. For non-Panther 2038 * CPUs, we simply end up clearing the $data register twice. 2039 */ 2040 stxa %g0, [%o4 + %o2]ASI_DC_DATA 2041 membar #Sync 2042 2043 subcc %o2, 8, %o2 2044 bge 2b 2045 nop 2046 2047 subcc %o0, %o1, %o0 2048 bge 1b 2049 nop 2050 2051 retl 2052 nop 2053 SET_SIZE(correct_dcache_parity) 2054 2055 #endif /* CPU_IMP_L1_CACHE_PARITY */ 2056 2057 2058 ENTRY_NP(stick_timestamp) 2059 rd STICK, %g1 ! read stick reg 2060 sllx %g1, 1, %g1 2061 srlx %g1, 1, %g1 ! clear npt bit 2062 2063 retl 2064 stx %g1, [%o0] ! store the timestamp 2065 SET_SIZE(stick_timestamp) 2066 2067 2068 ENTRY_NP(stick_adj) 2069 rdpr %pstate, %g1 ! save processor state 2070 andn %g1, PSTATE_IE, %g3 2071 ba 1f ! cache align stick adj 2072 wrpr %g0, %g3, %pstate ! turn off interrupts 2073 2074 .align 16 2075 1: nop 2076 2077 rd STICK, %g4 ! read stick reg 2078 add %g4, %o0, %o1 ! adjust stick with skew 2079 wr %o1, %g0, STICK ! write stick reg 2080 2081 retl 2082 wrpr %g1, %pstate ! restore processor state 2083 SET_SIZE(stick_adj) 2084 2085 ENTRY_NP(kdi_get_stick) 2086 rd STICK, %g1 2087 stx %g1, [%o0] 2088 retl 2089 mov %g0, %o0 2090 SET_SIZE(kdi_get_stick) 2091 2092 /* 2093 * Invalidate the specified line from the D$. 2094 * 2095 * Register usage: 2096 * %o0 - index for the invalidation, specifies DC_way and DC_addr 2097 * 2098 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is 2099 * stored to a particular DC_way and DC_addr in ASI_DC_TAG. 2100 * 2101 * The format of the stored 64-bit value is: 2102 * 2103 * +----------+--------+----------+ 2104 * | Reserved | DC_tag | DC_valid | 2105 * +----------+--------+----------+ 2106 * 63 31 30 1 0 2107 * 2108 * DC_tag is the 30-bit physical tag of the associated line. 2109 * DC_valid is the 1-bit valid field for both the physical and snoop tags. 2110 * 2111 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is: 2112 * 2113 * +----------+--------+----------+----------+ 2114 * | Reserved | DC_way | DC_addr | Reserved | 2115 * +----------+--------+----------+----------+ 2116 * 63 16 15 14 13 5 4 0 2117 * 2118 * DC_way is a 2-bit index that selects one of the 4 ways. 2119 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields. 2120 * 2121 * Setting the DC_valid bit to zero for the specified DC_way and 2122 * DC_addr index into the D$ results in an invalidation of a D$ line. 2123 */ 2124 ENTRY(dcache_inval_line) 2125 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr 2126 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits 2127 membar #Sync 2128 retl 2129 nop 2130 SET_SIZE(dcache_inval_line) 2131 2132 /* 2133 * Invalidate the entire I$ 2134 * 2135 * Register usage: 2136 * %o0 - specifies IC_way, IC_addr, IC_tag 2137 * %o1 - scratch 2138 * %o2 - used to save and restore DCU value 2139 * %o3 - scratch 2140 * %o5 - used to save and restore PSTATE 2141 * 2142 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG, 2143 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and 2144 * block out snoops and invalidates to the I$, causing I$ consistency 2145 * to be broken. Before turning on the I$, all I$ lines must be invalidated. 2146 * 2147 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is 2148 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The 2149 * info below describes store (write) use of ASI_IC_TAG. Note that read 2150 * use of ASI_IC_TAG behaves differently. 2151 * 2152 * The format of the stored 64-bit value is: 2153 * 2154 * +----------+--------+---------------+-----------+ 2155 * | Reserved | Valid | IC_vpred<7:0> | Undefined | 2156 * +----------+--------+---------------+-----------+ 2157 * 63 55 54 53 46 45 0 2158 * 2159 * Valid is the 1-bit valid field for both the physical and snoop tags. 2160 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at 2161 * the 32-byte boundary aligned address specified by IC_addr. 2162 * 2163 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is: 2164 * 2165 * +----------+--------+---------+--------+---------+ 2166 * | Reserved | IC_way | IC_addr | IC_tag |Reserved | 2167 * +----------+--------+---------+--------+---------+ 2168 * 63 16 15 14 13 5 4 3 2 0 2169 * 2170 * IC_way is a 2-bit index that selects one of the 4 ways. 2171 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields. 2172 * IC_addr[5] is a "don't care" for a store. 2173 * IC_tag set to 2 specifies that the stored value is to be interpreted 2174 * as containing Valid and IC_vpred as described above. 2175 * 2176 * Setting the Valid bit to zero for the specified IC_way and 2177 * IC_addr index into the I$ results in an invalidation of an I$ line. 2178 */ 2179 ENTRY(icache_inval_all) 2180 rdpr %pstate, %o5 2181 andn %o5, PSTATE_IE, %o3 2182 wrpr %g0, %o3, %pstate ! clear IE bit 2183 2184 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1); 2185 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1 2186 ba,pt %icc, 2f 2187 ld [%o0 + CHPR_ICACHE_SIZE], %o0 2188 icache_inval_all_1: 2189 ASM_LD(%o0, icache_size) 2190 ASM_LD(%o1, icache_linesize) 2191 2: 2192 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4) 2193 2194 retl 2195 wrpr %g0, %o5, %pstate ! restore earlier pstate 2196 SET_SIZE(icache_inval_all) 2197 2198 2199 /* 2200 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a 2201 * crosstrap. It atomically increments the outstanding request counter and, 2202 * if there was not already an outstanding request, branches to setsoftint_tl1 2203 * to enqueue an intr_vec for the given inum. 2204 */ 2205 2206 ! Register usage: 2207 ! 2208 ! Arguments: 2209 ! %g1 - inum 2210 ! %g2 - index into chsm_outstanding array 2211 ! 2212 ! Internal: 2213 ! %g2, %g3, %g5 - scratch 2214 ! %g4 - ptr. to scrub_misc chsm_outstanding[index]. 2215 ! %g6 - setsoftint_tl1 address 2216 2217 ENTRY_NP(cache_scrubreq_tl1) 2218 mulx %g2, CHSM_OUTSTANDING_INCR, %g2 2219 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3 2220 add %g2, %g3, %g2 2221 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f); 2222 ld [%g4], %g2 ! cpu's chsm_outstanding[index] 2223 ! 2224 ! no need to use atomic instructions for the following 2225 ! increment - we're at tl1 2226 ! 2227 add %g2, 0x1, %g3 2228 brnz,pn %g2, 1f ! no need to enqueue more intr_vec 2229 st %g3, [%g4] ! delay - store incremented counter 2230 ASM_JMP(%g6, setsoftint_tl1) 2231 ! not reached 2232 1: 2233 retry 2234 SET_SIZE(cache_scrubreq_tl1) 2235 2236 2237 /* 2238 * Get the error state for the processor. 2239 * Note that this must not be used at TL>0 2240 */ 2241 ENTRY(get_cpu_error_state) 2242 #if defined(CHEETAH_PLUS) 2243 set ASI_SHADOW_REG_VA, %o2 2244 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg 2245 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR] 2246 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg 2247 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR] 2248 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers 2249 cmp %o3, PANTHER_IMPL 2250 bne,a 1f 2251 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN 2252 set ASI_AFSR_EXT_VA, %o2 2253 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg 2254 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT] 2255 set ASI_SHADOW_AFSR_EXT_VA, %o2 2256 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg 2257 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] 2258 b 2f 2259 nop 2260 1: 2261 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN 2262 2: 2263 #else /* CHEETAH_PLUS */ 2264 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR] 2265 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR] 2266 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] 2267 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] 2268 #endif /* CHEETAH_PLUS */ 2269 #if defined(SERRANO) 2270 /* 2271 * Serrano has an afar2 which captures the address on FRC/FRU errors. 2272 * We save this in the afar2 of the register save area. 2273 */ 2274 set ASI_MCU_AFAR2_VA, %o2 2275 ldxa [%o2]ASI_MCU_CTRL, %o1 2276 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2] 2277 #endif /* SERRANO */ 2278 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg 2279 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR] 2280 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg 2281 retl 2282 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR] 2283 SET_SIZE(get_cpu_error_state) 2284 2285 /* 2286 * Check a page of memory for errors. 2287 * 2288 * Load each 64 byte block from physical memory. 2289 * Check AFSR after each load to see if an error 2290 * was caused. If so, log/scrub that error. 2291 * 2292 * Used to determine if a page contains 2293 * CEs when CEEN is disabled. 2294 */ 2295 ENTRY(cpu_check_block) 2296 ! 2297 ! get a new window with room for the error regs 2298 ! 2299 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp 2300 srl %i1, 6, %l4 ! clear top bits of psz 2301 ! and divide by 64 2302 rd %fprs, %l2 ! store FP 2303 wr %g0, FPRS_FEF, %fprs ! enable FP 2304 1: 2305 ldda [%i0]ASI_BLK_P, %d0 ! load a block 2306 membar #Sync 2307 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg 2308 brz,a,pt %l3, 2f ! check for error 2309 nop 2310 2311 ! 2312 ! if error, read the error regs and log it 2313 ! 2314 call get_cpu_error_state 2315 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0 2316 2317 ! 2318 ! cpu_ce_detected(ch_cpu_errors_t *, flag) 2319 ! 2320 call cpu_ce_detected ! log the error 2321 mov CE_CEEN_TIMEOUT, %o1 2322 2: 2323 dec %l4 ! next 64-byte block 2324 brnz,a,pt %l4, 1b 2325 add %i0, 64, %i0 ! increment block addr 2326 2327 wr %l2, %g0, %fprs ! restore FP 2328 ret 2329 restore 2330 2331 SET_SIZE(cpu_check_block) 2332 2333 /* 2334 * Perform a cpu logout called from C. This is used where we did not trap 2335 * for the error but still want to gather "what we can". Caller must make 2336 * sure cpu private area exists and that the indicated logout area is free 2337 * for use, and that we are unable to migrate cpus. 2338 */ 2339 ENTRY(cpu_delayed_logout) 2340 rdpr %pstate, %o2 2341 andn %o2, PSTATE_IE, %o2 2342 wrpr %g0, %o2, %pstate ! disable interrupts 2343 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value 2344 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1 2345 rd %asi, %g1 2346 wr %g0, ASI_P, %asi 2347 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5) 2348 wr %g1, %asi 2349 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again 2350 rdpr %pstate, %o2 2351 or %o2, PSTATE_IE, %o2 2352 wrpr %g0, %o2, %pstate 2353 retl 2354 nop 2355 SET_SIZE(cpu_delayed_logout) 2356 2357 ENTRY(dtrace_blksuword32) 2358 save %sp, -SA(MINFRAME + 4), %sp 2359 2360 rdpr %pstate, %l1 2361 andn %l1, PSTATE_IE, %l2 ! disable interrupts to 2362 wrpr %g0, %l2, %pstate ! protect our FPU diddling 2363 2364 rd %fprs, %l0 2365 andcc %l0, FPRS_FEF, %g0 2366 bz,a,pt %xcc, 1f ! if the fpu is disabled 2367 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu 2368 2369 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack 2370 1: 2371 set 0f, %l5 2372 /* 2373 * We're about to write a block full or either total garbage 2374 * (not kernel data, don't worry) or user floating-point data 2375 * (so it only _looks_ like garbage). 2376 */ 2377 ld [%i1], %f0 ! modify the block 2378 membar #Sync 2379 stn %l5, [THREAD_REG + T_LOFAULT] ! set up the lofault handler 2380 stda %d0, [%i0]ASI_BLK_COMMIT_S ! store the modified block 2381 membar #Sync 2382 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler 2383 2384 bz,a,pt %xcc, 1f 2385 wr %g0, %l0, %fprs ! restore %fprs 2386 2387 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0 2388 1: 2389 2390 wrpr %g0, %l1, %pstate ! restore interrupts 2391 2392 ret 2393 restore %g0, %g0, %o0 2394 2395 0: 2396 membar #Sync 2397 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler 2398 2399 bz,a,pt %xcc, 1f 2400 wr %g0, %l0, %fprs ! restore %fprs 2401 2402 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0 2403 1: 2404 2405 wrpr %g0, %l1, %pstate ! restore interrupts 2406 2407 /* 2408 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err() 2409 * which deals with watchpoints. Otherwise, just return -1. 2410 */ 2411 brnz,pt %i2, 1f 2412 nop 2413 ret 2414 restore %g0, -1, %o0 2415 1: 2416 call dtrace_blksuword32_err 2417 restore 2418 2419 SET_SIZE(dtrace_blksuword32) 2420 2421 #ifdef CHEETAHPLUS_ERRATUM_25 2422 2423 ENTRY(claimlines) 2424 1: 2425 subcc %o1, %o2, %o1 2426 add %o0, %o1, %o3 2427 bgeu,a,pt %xcc, 1b 2428 casxa [%o3]ASI_MEM, %g0, %g0 2429 membar #Sync 2430 retl 2431 nop 2432 SET_SIZE(claimlines) 2433 2434 ENTRY(cpu_feature_init) 2435 save %sp, -SA(MINFRAME), %sp 2436 sethi %hi(cheetah_bpe_off), %o0 2437 ld [%o0 + %lo(cheetah_bpe_off)], %o0 2438 brz %o0, 1f 2439 nop 2440 rd ASR_DISPATCH_CONTROL, %o0 2441 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0 2442 wr %o0, 0, ASR_DISPATCH_CONTROL 2443 1: 2444 ! 2445 ! get the device_id and store the device_id 2446 ! in the appropriate cpunodes structure 2447 ! given the cpus index 2448 ! 2449 CPU_INDEX(%o0, %o1) 2450 mulx %o0, CPU_NODE_SIZE, %o0 2451 set cpunodes + DEVICE_ID, %o1 2452 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2 2453 stx %o2, [%o0 + %o1] 2454 #ifdef CHEETAHPLUS_ERRATUM_34 2455 ! 2456 ! apply Cheetah+ erratum 34 workaround 2457 ! 2458 call itlb_erratum34_fixup 2459 nop 2460 call dtlb_erratum34_fixup 2461 nop 2462 #endif /* CHEETAHPLUS_ERRATUM_34 */ 2463 ret 2464 restore 2465 SET_SIZE(cpu_feature_init) 2466 2467 /* 2468 * Copy a tsb entry atomically, from src to dest. 2469 * src must be 128 bit aligned. 2470 */ 2471 ENTRY(copy_tsb_entry) 2472 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data 2473 stx %o2, [%o1] 2474 stx %o3, [%o1 + 8 ] 2475 retl 2476 nop 2477 SET_SIZE(copy_tsb_entry) 2478 2479 #endif /* CHEETAHPLUS_ERRATUM_25 */ 2480 2481 #ifdef CHEETAHPLUS_ERRATUM_34 2482 2483 ! 2484 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked 2485 ! index 0 TTE will erroneously be displaced when a new TTE is 2486 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34, 2487 ! locked index 0 TTEs must be relocated. 2488 ! 2489 ! NOTE: Care must be taken to avoid an ITLB miss in this routine. 2490 ! 2491 ENTRY_NP(itlb_erratum34_fixup) 2492 rdpr %pstate, %o3 2493 #ifdef DEBUG 2494 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1) 2495 #endif /* DEBUG */ 2496 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts 2497 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data 2498 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag 2499 2500 cmp %o1, %g0 ! Is this entry valid? 2501 bge %xcc, 1f 2502 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked? 2503 bnz %icc, 2f 2504 nop 2505 1: 2506 retl ! Nope, outta here... 2507 wrpr %g0, %o3, %pstate ! Enable interrupts 2508 2: 2509 sethi %hi(FLUSH_ADDR), %o4 2510 stxa %g0, [%o2]ASI_ITLB_DEMAP ! Flush this mapping 2511 flush %o4 ! Flush required for I-MMU 2512 ! 2513 ! Start search from index 1 up. This is because the kernel force 2514 ! loads its text page at index 15 in sfmmu_kernel_remap() and we 2515 ! don't want our relocated entry evicted later. 2516 ! 2517 ! NOTE: We assume that we'll be successful in finding an unlocked 2518 ! or invalid entry. If that isn't the case there are bound to 2519 ! bigger problems. 2520 ! 2521 set (1 << 3), %g3 2522 3: 2523 ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16 2524 ! 2525 ! If this entry isn't valid, we'll choose to displace it (regardless 2526 ! of the lock bit). 2527 ! 2528 cmp %o4, %g0 ! TTE is > 0 iff not valid 2529 bge %xcc, 4f ! If invalid, go displace 2530 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit 2531 bnz,a %icc, 3b ! If locked, look at next 2532 add %g3, (1 << 3), %g3 ! entry 2533 4: 2534 ! 2535 ! We found an unlocked or invalid entry; we'll explicitly load 2536 ! the former index 0 entry here. 2537 ! 2538 sethi %hi(FLUSH_ADDR), %o4 2539 set MMU_TAG_ACCESS, %g4 2540 stxa %o2, [%g4]ASI_IMMU 2541 stxa %o1, [%g3]ASI_ITLB_ACCESS 2542 flush %o4 ! Flush required for I-MMU 2543 retl 2544 wrpr %g0, %o3, %pstate ! Enable interrupts 2545 SET_SIZE(itlb_erratum34_fixup) 2546 2547 ! 2548 ! In Cheetah+ erratum 34, under certain conditions a DTLB locked 2549 ! index 0 TTE will erroneously be displaced when a new TTE is 2550 ! loaded. In order to avoid cheetah+ erratum 34, locked index 0 2551 ! TTEs must be relocated. 2552 ! 2553 ENTRY_NP(dtlb_erratum34_fixup) 2554 rdpr %pstate, %o3 2555 #ifdef DEBUG 2556 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1) 2557 #endif /* DEBUG */ 2558 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts 2559 ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data 2560 ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag 2561 2562 cmp %o1, %g0 ! Is this entry valid? 2563 bge %xcc, 1f 2564 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked? 2565 bnz %icc, 2f 2566 nop 2567 1: 2568 retl ! Nope, outta here... 2569 wrpr %g0, %o3, %pstate ! Enable interrupts 2570 2: 2571 stxa %g0, [%o2]ASI_DTLB_DEMAP ! Flush this mapping 2572 membar #Sync 2573 ! 2574 ! Start search from index 1 up. 2575 ! 2576 ! NOTE: We assume that we'll be successful in finding an unlocked 2577 ! or invalid entry. If that isn't the case there are bound to 2578 ! bigger problems. 2579 ! 2580 set (1 << 3), %g3 2581 3: 2582 ldxa [%g3]ASI_DTLB_ACCESS, %o4 ! Load TTE from t16 2583 ! 2584 ! If this entry isn't valid, we'll choose to displace it (regardless 2585 ! of the lock bit). 2586 ! 2587 cmp %o4, %g0 ! TTE is > 0 iff not valid 2588 bge %xcc, 4f ! If invalid, go displace 2589 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit 2590 bnz,a %icc, 3b ! If locked, look at next 2591 add %g3, (1 << 3), %g3 ! entry 2592 4: 2593 ! 2594 ! We found an unlocked or invalid entry; we'll explicitly load 2595 ! the former index 0 entry here. 2596 ! 2597 set MMU_TAG_ACCESS, %g4 2598 stxa %o2, [%g4]ASI_DMMU 2599 stxa %o1, [%g3]ASI_DTLB_ACCESS 2600 membar #Sync 2601 retl 2602 wrpr %g0, %o3, %pstate ! Enable interrupts 2603 SET_SIZE(dtlb_erratum34_fixup) 2604 2605 #endif /* CHEETAHPLUS_ERRATUM_34 */ 2606