1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 # ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/errno.h> 30 #include <sys/asm_linkage.h> 31 #include <sys/vtrace.h> 32 #include <sys/machthread.h> 33 #include <sys/clock.h> 34 #include <sys/asi.h> 35 #include <sys/fsr.h> 36 #include <sys/privregs.h> 37 38 #include "assym.h" 39 40 41 /* 42 * Less then or equal this number of bytes we will always copy byte-for-byte 43 */ 44 #define SMALL_LIMIT 7 45 46 /* 47 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 48 * handler was set 49 */ 50 #define LOFAULT_SET 2 51 52 53 /* 54 * Copy a block of storage, returning an error code if `from' or 55 * `to' takes a kernel pagefault which cannot be resolved. 56 * Returns errno value on pagefault error, 0 if all ok 57 */ 58 59 60 61 .seg ".text" 62 .align 4 63 64 ENTRY(kcopy) 65 66 save %sp, -SA(MINFRAME), %sp 67 set .copyerr, %l7 ! copyerr is lofault value 68 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 69 or %o5, LOFAULT_SET, %o5 70 membar #Sync ! sync error barrier 71 b .do_copy ! common code 72 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 73 74 /* 75 * We got here because of a fault during kcopy. 76 * Errno value is in %g1. 77 */ 78 .copyerr: 79 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 80 ! into %o5 to indicate it has set t_lofault handler. Need to clear 81 ! LOFAULT_SET flag before restoring the error handler. 82 andn %o5, LOFAULT_SET, %o5 83 membar #Sync ! sync error barrier 84 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 85 ret 86 restore %g1, 0, %o0 87 88 SET_SIZE(kcopy) 89 90 91 /* 92 * Copy a block of storage - must not overlap (from + len <= to). 93 */ 94 95 ENTRY(bcopy) 96 97 save %sp, -SA(MINFRAME), %sp 98 clr %o5 ! flag LOFAULT_SET is not set for bcopy 99 100 .do_copy: 101 mov %i1, %g5 ! save dest addr start 102 103 mov %i2, %l6 ! save size 104 105 cmp %i2, 12 ! for small counts 106 blu %ncc, .bytecp ! just copy bytes 107 .empty 108 109 ! 110 ! use aligned transfers where possible 111 ! 112 xor %i0, %i1, %o4 ! xor from and to address 113 btst 7, %o4 ! if lower three bits zero 114 bz .aldoubcp ! can align on double boundary 115 .empty ! assembler complaints about label 116 117 xor %i0, %i1, %o4 ! xor from and to address 118 btst 3, %o4 ! if lower two bits zero 119 bz .alwordcp ! can align on word boundary 120 btst 3, %i0 ! delay slot, from address unaligned? 121 ! 122 ! use aligned reads and writes where possible 123 ! this differs from wordcp in that it copes 124 ! with odd alignment between source and destnation 125 ! using word reads and writes with the proper shifts 126 ! in between to align transfers to and from memory 127 ! i0 - src address, i1 - dest address, i2 - count 128 ! i3, i4 - tmps for used generating complete word 129 ! i5 (word to write) 130 ! l0 size in bits of upper part of source word (US) 131 ! l1 size in bits of lower part of source word (LS = 32 - US) 132 ! l2 size in bits of upper part of destination word (UD) 133 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 134 ! l4 number of bytes leftover after aligned transfers complete 135 ! l5 the number 32 136 ! 137 mov 32, %l5 ! load an oft-needed constant 138 bz .align_dst_only 139 btst 3, %i1 ! is destnation address aligned? 140 clr %i4 ! clear registers used in either case 141 bz .align_src_only 142 clr %l0 143 ! 144 ! both source and destination addresses are unaligned 145 ! 146 1: ! align source 147 ldub [%i0], %i3 ! read a byte from source address 148 add %i0, 1, %i0 ! increment source address 149 or %i4, %i3, %i4 ! or in with previous bytes (if any) 150 btst 3, %i0 ! is source aligned? 151 add %l0, 8, %l0 ! increment size of upper source (US) 152 bnz,a 1b 153 sll %i4, 8, %i4 ! make room for next byte 154 155 sub %l5, %l0, %l1 ! generate shift left count (LS) 156 sll %i4, %l1, %i4 ! prepare to get rest 157 ld [%i0], %i3 ! read a word 158 add %i0, 4, %i0 ! increment source address 159 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 160 or %i4, %i5, %i5 ! merge 161 mov 24, %l3 ! align destination 162 1: 163 srl %i5, %l3, %i4 ! prepare to write a single byte 164 stb %i4, [%i1] ! write a byte 165 add %i1, 1, %i1 ! increment destination address 166 sub %i2, 1, %i2 ! decrement count 167 btst 3, %i1 ! is destination aligned? 168 bnz,a 1b 169 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 170 sub %l5, %l3, %l2 ! generate shift left count (UD) 171 sll %i5, %l2, %i5 ! move leftover into upper bytes 172 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 173 bgu %ncc, .more_needed ! need more to fill than we have 174 nop 175 176 sll %i3, %l1, %i3 ! clear upper used byte(s) 177 srl %i3, %l1, %i3 178 ! get the odd bytes between alignments 179 sub %l0, %l2, %l0 ! regenerate shift count 180 sub %l5, %l0, %l1 ! generate new shift left count (LS) 181 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 182 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 183 srl %i3, %l0, %i4 184 or %i5, %i4, %i5 185 st %i5, [%i1] ! write a word 186 subcc %i2, 4, %i2 ! decrement count 187 bz %ncc, .unalign_out 188 add %i1, 4, %i1 ! increment destination address 189 190 b 2f 191 sll %i3, %l1, %i5 ! get leftover into upper bits 192 .more_needed: 193 sll %i3, %l0, %i3 ! save remaining byte(s) 194 srl %i3, %l0, %i3 195 sub %l2, %l0, %l1 ! regenerate shift count 196 sub %l5, %l1, %l0 ! generate new shift left count 197 sll %i3, %l1, %i4 ! move to fill empty space 198 b 3f 199 or %i5, %i4, %i5 ! merge to complete word 200 ! 201 ! the source address is aligned and destination is not 202 ! 203 .align_dst_only: 204 ld [%i0], %i4 ! read a word 205 add %i0, 4, %i0 ! increment source address 206 mov 24, %l0 ! initial shift alignment count 207 1: 208 srl %i4, %l0, %i3 ! prepare to write a single byte 209 stb %i3, [%i1] ! write a byte 210 add %i1, 1, %i1 ! increment destination address 211 sub %i2, 1, %i2 ! decrement count 212 btst 3, %i1 ! is destination aligned? 213 bnz,a 1b 214 sub %l0, 8, %l0 ! delay slot, decrement shift count 215 .xfer: 216 sub %l5, %l0, %l1 ! generate shift left count 217 sll %i4, %l1, %i5 ! get leftover 218 3: 219 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 220 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 221 2: 222 ld [%i0], %i3 ! read a source word 223 add %i0, 4, %i0 ! increment source address 224 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 225 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 226 st %i5, [%i1] ! write a destination word 227 subcc %i2, 4, %i2 ! decrement count 228 bz %ncc, .unalign_out ! check if done 229 add %i1, 4, %i1 ! increment destination address 230 b 2b ! loop 231 sll %i3, %l1, %i5 ! get leftover 232 .unalign_out: 233 tst %l4 ! any bytes leftover? 234 bz %ncc, .cpdone 235 .empty ! allow next instruction in delay slot 236 1: 237 sub %l0, 8, %l0 ! decrement shift 238 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 239 stb %i4, [%i1] ! write a byte 240 subcc %l4, 1, %l4 ! decrement count 241 bz %ncc, .cpdone ! done? 242 add %i1, 1, %i1 ! increment destination 243 tst %l0 ! any more previously read bytes 244 bnz %ncc, 1b ! we have leftover bytes 245 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 246 b .dbytecp ! let dbytecp do the rest 247 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 248 ! 249 ! the destination address is aligned and the source is not 250 ! 251 .align_src_only: 252 ldub [%i0], %i3 ! read a byte from source address 253 add %i0, 1, %i0 ! increment source address 254 or %i4, %i3, %i4 ! or in with previous bytes (if any) 255 btst 3, %i0 ! is source aligned? 256 add %l0, 8, %l0 ! increment shift count (US) 257 bnz,a .align_src_only 258 sll %i4, 8, %i4 ! make room for next byte 259 b,a .xfer 260 ! 261 ! if from address unaligned for double-word moves, 262 ! move bytes till it is, if count is < 56 it could take 263 ! longer to align the thing than to do the transfer 264 ! in word size chunks right away 265 ! 266 .aldoubcp: 267 cmp %i2, 56 ! if count < 56, use wordcp, it takes 268 blu,a %ncc, .alwordcp ! longer to align doubles than words 269 mov 3, %o0 ! mask for word alignment 270 call .alignit ! copy bytes until aligned 271 mov 7, %o0 ! mask for double alignment 272 ! 273 ! source and destination are now double-word aligned 274 ! i3 has aligned count returned by alignit 275 ! 276 and %i2, 7, %i2 ! unaligned leftover count 277 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 278 5: 279 ldx [%i0+%i1], %o4 ! read from address 280 stx %o4, [%i1] ! write at destination address 281 subcc %i3, 8, %i3 ! dec count 282 bgu %ncc, 5b 283 add %i1, 8, %i1 ! delay slot, inc to address 284 cmp %i2, 4 ! see if we can copy a word 285 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 286 .empty 287 ! 288 ! for leftover bytes we fall into wordcp, if needed 289 ! 290 .wordcp: 291 and %i2, 3, %i2 ! unaligned leftover count 292 5: 293 ld [%i0+%i1], %o4 ! read from address 294 st %o4, [%i1] ! write at destination address 295 subcc %i3, 4, %i3 ! dec count 296 bgu %ncc, 5b 297 add %i1, 4, %i1 ! delay slot, inc to address 298 b,a .dbytecp 299 300 ! we come here to align copies on word boundaries 301 .alwordcp: 302 call .alignit ! go word-align it 303 mov 3, %o0 ! bits that must be zero to be aligned 304 b .wordcp 305 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 306 307 ! 308 ! byte copy, works with any alignment 309 ! 310 .bytecp: 311 b .dbytecp 312 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 313 314 ! 315 ! differenced byte copy, works with any alignment 316 ! assumes dest in %i1 and (source - dest) in %i0 317 ! 318 1: 319 stb %o4, [%i1] ! write to address 320 inc %i1 ! inc to address 321 .dbytecp: 322 deccc %i2 ! dec count 323 bgeu,a %ncc, 1b ! loop till done 324 ldub [%i0+%i1], %o4 ! read from address 325 .cpdone: 326 membar #Sync ! sync error barrier 327 ! Restore t_lofault handler, if came here from kcopy(). 328 tst %o5 329 bz %ncc, 1f 330 andn %o5, LOFAULT_SET, %o5 331 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 332 1: 333 mov %g5, %o0 ! copy dest address 334 call sync_icache 335 mov %l6, %o1 ! saved size 336 ret 337 restore %g0, 0, %o0 ! return (0) 338 339 /* 340 * Common code used to align transfers on word and doubleword 341 * boudaries. Aligns source and destination and returns a count 342 * of aligned bytes to transfer in %i3 343 */ 344 1: 345 inc %i0 ! inc from 346 stb %o4, [%i1] ! write a byte 347 inc %i1 ! inc to 348 dec %i2 ! dec count 349 .alignit: 350 btst %o0, %i0 ! %o0 is bit mask to check for alignment 351 bnz,a 1b 352 ldub [%i0], %o4 ! read next byte 353 354 retl 355 andn %i2, %o0, %i3 ! return size of aligned bytes 356 SET_SIZE(bcopy) 357 358 /* 359 * Block copy with possibly overlapped operands. 360 */ 361 362 ENTRY(ovbcopy) 363 tst %o2 ! check count 364 bgu,a %ncc, 1f ! nothing to do or bad arguments 365 subcc %o0, %o1, %o3 ! difference of from and to address 366 367 retl ! return 368 nop 369 1: 370 bneg,a %ncc, 2f 371 neg %o3 ! if < 0, make it positive 372 2: cmp %o2, %o3 ! cmp size and abs(from - to) 373 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 374 .empty ! no overlap 375 cmp %o0, %o1 ! compare from and to addresses 376 blu %ncc, .ov_bkwd ! if from < to, copy backwards 377 nop 378 ! 379 ! Copy forwards. 380 ! 381 .ov_fwd: 382 ldub [%o0], %o3 ! read from address 383 inc %o0 ! inc from address 384 stb %o3, [%o1] ! write to address 385 deccc %o2 ! dec count 386 bgu %ncc, .ov_fwd ! loop till done 387 inc %o1 ! inc to address 388 389 retl ! return 390 nop 391 ! 392 ! Copy backwards. 393 ! 394 .ov_bkwd: 395 deccc %o2 ! dec count 396 ldub [%o0 + %o2], %o3 ! get byte at end of src 397 bgu %ncc, .ov_bkwd ! loop till done 398 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 399 400 retl ! return 401 nop 402 SET_SIZE(ovbcopy) 403 404 /* 405 * hwblkpagecopy() 406 * 407 * Copies exactly one page. This routine assumes the caller (ppcopy) 408 * has already disabled kernel preemption and has checked 409 * use_hw_bcopy. 410 */ 411 ENTRY(hwblkpagecopy) 412 save %sp, -SA(MINFRAME), %sp 413 414 ! %i0 - source address (arg) 415 ! %i1 - destination address (arg) 416 ! %i2 - length of region (not arg) 417 418 set PAGESIZE, %i2 419 mov %i1, %o0 ! store destination address for flushing 420 421 /* 422 * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 423 */ 424 1: 425 ldx [%i0+0x0], %l0 426 ldx [%i0+0x8], %l1 427 ldx [%i0+0x10], %l2 428 ldx [%i0+0x18], %l3 429 ldx [%i0+0x20], %l4 430 ldx [%i0+0x28], %l5 431 ldx [%i0+0x30], %l6 432 ldx [%i0+0x38], %l7 433 stx %l0, [%i1+0x0] 434 stx %l1, [%i1+0x8] 435 stx %l2, [%i1+0x10] 436 stx %l3, [%i1+0x18] 437 stx %l4, [%i1+0x20] 438 stx %l5, [%i1+0x28] 439 stx %l6, [%i1+0x30] 440 stx %l7, [%i1+0x38] 441 442 ldx [%i0+0x40], %l0 443 ldx [%i0+0x48], %l1 444 ldx [%i0+0x50], %l2 445 ldx [%i0+0x58], %l3 446 ldx [%i0+0x60], %l4 447 ldx [%i0+0x68], %l5 448 ldx [%i0+0x70], %l6 449 ldx [%i0+0x78], %l7 450 stx %l0, [%i1+0x40] 451 stx %l1, [%i1+0x48] 452 stx %l2, [%i1+0x50] 453 stx %l3, [%i1+0x58] 454 stx %l4, [%i1+0x60] 455 stx %l5, [%i1+0x68] 456 stx %l6, [%i1+0x70] 457 stx %l7, [%i1+0x78] 458 459 add %i0, 0x80, %i0 460 subcc %i2, 0x80, %i2 461 bgu,pt %xcc, 1b 462 add %i1, 0x80, %i1 463 464 ! %o0 contains the dest. address 465 set PAGESIZE, %o1 466 call sync_icache 467 nop 468 469 membar #Sync 470 ret 471 restore %g0, 0, %o0 472 SET_SIZE(hwblkpagecopy) 473 474 475 /* 476 * Transfer data to and from user space - 477 * Note that these routines can cause faults 478 * It is assumed that the kernel has nothing at 479 * less than KERNELBASE in the virtual address space. 480 * 481 * Note that copyin(9F) and copyout(9F) are part of the 482 * DDI/DKI which specifies that they return '-1' on "errors." 483 * 484 * Sigh. 485 * 486 * So there's two extremely similar routines - xcopyin() and xcopyout() 487 * which return the errno that we've faithfully computed. This 488 * allows other callers (e.g. uiomove(9F)) to work correctly. 489 * Given that these are used pretty heavily, we expand the calling 490 * sequences inline for all flavours (rather than making wrappers). 491 * 492 * There are also stub routines for xcopyout_little and xcopyin_little, 493 * which currently are intended to handle requests of <= 16 bytes from 494 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 495 * is left as an exercise... 496 */ 497 498 /* 499 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 500 * 501 * General theory of operation: 502 * 503 * None of the copyops routines grab a window. 504 * 505 * Flow: 506 * 507 * If count == zero return zero. 508 * 509 * Store the previous lo_fault handler into %g6. 510 * Place our secondary lofault handler into %g5. 511 * Place the address of our fault handler into %o3. 512 * 513 * If count is less than or equal to SMALL_LIMIT (7) we 514 * always do a byte for byte copy. 515 * 516 * If count is > SMALL_LIMIT, we check the alignment of the input 517 * and output pointers. We store -count in %o3, we store the number 518 * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop 519 * in %o2. Following this we branch to the appropriate copy loop and 520 * copy that many chunks. Since we've been adding the chunk size 521 * to %o3 each time through as well as decrementing %o2, we can tell 522 * if any data is is left to be copied by examining %o3. If that is 523 * zero, we're done and can go home. If not, we figure out what the 524 * largest chunk size left to be copied is and branch to that copy 525 * loop unless there's only one byte left. We load that as we're 526 * branching to code that stores it just before we return. 527 * 528 * Fault handlers are invoked if we reference memory that has no 529 * current mapping. All forms share the same copyio_fault handler. 530 * This routine handles fixing up the stack and general housecleaning. 531 * Each copy operation has a simple fault handler that is then called 532 * to do the work specific to the invidual operation. The handler 533 * for copyOP and xcopyOP are found at the end of individual function. 534 * The handlers for xcopyOP_little are found at the end of xcopyin_little. 535 * The handlers for copyOP_noerr are found at the end of copyin_noerr. 536 */ 537 538 /* 539 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 540 */ 541 542 /* 543 * We save the arguments in the following registers in case of a fault: 544 * kaddr - %g2 545 * uaddr - %g3 546 * count - %g4 547 */ 548 #define SAVE_SRC %g2 549 #define SAVE_DST %g3 550 #define SAVE_COUNT %g4 551 552 #define REAL_LOFAULT %g5 553 #define SAVED_LOFAULT %g6 554 555 /* 556 * Generic copyio fault handler. This is the first line of defense when a 557 * fault occurs in (x)copyin/(x)copyout. In order for this to function 558 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 559 * This allows us to share common code for all the flavors of the copy 560 * operations, including the _noerr versions. 561 * 562 * Note that this function will restore the original input parameters before 563 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 564 * member of the t_copyop structure, if needed. 565 */ 566 ENTRY(copyio_fault) 567 membar #Sync 568 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 569 570 mov SAVE_SRC, %o0 571 mov SAVE_DST, %o1 572 jmp REAL_LOFAULT 573 mov SAVE_COUNT, %o2 574 SET_SIZE(copyio_fault) 575 576 ENTRY(copyout) 577 sethi %hi(.copyout_err), REAL_LOFAULT 578 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 579 580 .do_copyout: 581 ! 582 ! Check the length and bail if zero. 583 ! 584 tst %o2 585 bnz,pt %ncc, 1f 586 nop 587 retl 588 clr %o0 589 1: 590 sethi %hi(copyio_fault), %o3 591 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 592 or %o3, %lo(copyio_fault), %o3 593 membar #Sync 594 stn %o3, [THREAD_REG + T_LOFAULT] 595 596 mov %o0, SAVE_SRC 597 mov %o1, SAVE_DST 598 mov %o2, SAVE_COUNT 599 600 ! 601 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 602 ! Run in leaf mode, using the %o regs as our input regs. 603 ! 604 subcc %o2, SMALL_LIMIT, %o3 605 bgu,a,pt %ncc, .dco_ns 606 or %o0, %o1, %o3 607 608 .dcobcp: 609 sub %g0, %o2, %o3 ! negate count 610 add %o0, %o2, %o0 ! make %o0 point at the end 611 add %o1, %o2, %o1 ! make %o1 point at the end 612 ba,pt %ncc, .dcocl 613 ldub [%o0 + %o3], %o4 ! load first byte 614 ! 615 ! %o0 and %o2 point at the end and remain pointing at the end 616 ! of their buffers. We pull things out by adding %o3 (which is 617 ! the negation of the length) to the buffer end which gives us 618 ! the curent location in the buffers. By incrementing %o3 we walk 619 ! through both buffers without having to bump each buffer's 620 ! pointer. A very fast 4 instruction loop. 621 ! 622 .align 16 623 .dcocl: 624 stba %o4, [%o1 + %o3]ASI_USER 625 inccc %o3 626 bl,a,pt %ncc, .dcocl 627 ldub [%o0 + %o3], %o4 628 ! 629 ! We're done. Go home. 630 ! 631 membar #Sync 632 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 633 retl 634 clr %o0 635 ! 636 ! Try aligned copies from here. 637 ! 638 .dco_ns: 639 ! %o0 = kernel addr (to be copied from) 640 ! %o1 = user addr (to be copied to) 641 ! %o2 = length 642 ! %o3 = %o1 | %o2 (used for alignment checking) 643 ! %o4 is alternate lo_fault 644 ! %o5 is original lo_fault 645 ! 646 ! See if we're single byte aligned. If we are, check the 647 ! limit for single byte copies. If we're smaller or equal, 648 ! bounce to the byte for byte copy loop. Otherwise do it in 649 ! HW (if enabled). 650 ! 651 btst 1, %o3 652 bz,pt %icc, .dcoh8 653 btst 7, %o3 654 655 ba .dcobcp 656 nop 657 .dcoh8: 658 ! 659 ! 8 byte aligned? 660 ! 661 bnz,a %ncc, .dcoh4 662 btst 3, %o3 663 .dcos8: 664 ! 665 ! Housekeeping for copy loops. Uses same idea as in the byte for 666 ! byte copy loop above. 667 ! 668 add %o0, %o2, %o0 669 add %o1, %o2, %o1 670 sub %g0, %o2, %o3 671 ba,pt %ncc, .dodebc 672 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 673 ! 674 ! 4 byte aligned? 675 ! 676 .dcoh4: 677 bnz,pn %ncc, .dcoh2 678 nop 679 .dcos4: 680 add %o0, %o2, %o0 681 add %o1, %o2, %o1 682 sub %g0, %o2, %o3 683 ba,pt %ncc, .dodfbc 684 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 685 ! 686 ! We must be 2 byte aligned. Off we go. 687 ! The check for small copies was done in the 688 ! delay at .dcoh4 689 ! 690 .dcoh2: 691 .dcos2: 692 add %o0, %o2, %o0 693 add %o1, %o2, %o1 694 sub %g0, %o2, %o3 695 ba,pt %ncc, .dodtbc 696 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 697 698 .dodebc: 699 ldx [%o0 + %o3], %o4 700 deccc %o2 701 stxa %o4, [%o1 + %o3]ASI_USER 702 bg,pt %ncc, .dodebc 703 addcc %o3, 8, %o3 704 ! 705 ! End of copy loop. Check to see if we're done. Most 706 ! eight byte aligned copies end here. 707 ! 708 bz,pt %ncc, .dcofh 709 nop 710 ! 711 ! Something is left - do it byte for byte. 712 ! 713 ba,pt %ncc, .dcocl 714 ldub [%o0 + %o3], %o4 ! load next byte 715 ! 716 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 717 ! 718 .align 32 719 .dodfbc: 720 lduw [%o0 + %o3], %o4 721 deccc %o2 722 sta %o4, [%o1 + %o3]ASI_USER 723 bg,pt %ncc, .dodfbc 724 addcc %o3, 4, %o3 725 ! 726 ! End of copy loop. Check to see if we're done. Most 727 ! four byte aligned copies end here. 728 ! 729 bz,pt %ncc, .dcofh 730 nop 731 ! 732 ! Something is left. Do it byte for byte. 733 ! 734 ba,pt %ncc, .dcocl 735 ldub [%o0 + %o3], %o4 ! load next byte 736 ! 737 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 738 ! copy. 739 ! 740 .align 32 741 .dodtbc: 742 lduh [%o0 + %o3], %o4 743 deccc %o2 744 stha %o4, [%o1 + %o3]ASI_USER 745 bg,pt %ncc, .dodtbc 746 addcc %o3, 2, %o3 747 ! 748 ! End of copy loop. Anything left? 749 ! 750 bz,pt %ncc, .dcofh 751 nop 752 ! 753 ! Deal with the last byte 754 ! 755 ldub [%o0 + %o3], %o4 756 stba %o4, [%o1 + %o3]ASI_USER 757 .dcofh: 758 membar #Sync 759 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 760 retl 761 clr %o0 762 763 .copyout_err: 764 ldn [THREAD_REG + T_COPYOPS], %o4 765 brz %o4, 2f 766 nop 767 ldn [%o4 + CP_COPYOUT], %g2 768 jmp %g2 769 nop 770 2: 771 retl 772 mov -1, %o0 773 SET_SIZE(copyout) 774 775 776 ENTRY(xcopyout) 777 sethi %hi(.xcopyout_err), REAL_LOFAULT 778 b .do_copyout 779 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 780 .xcopyout_err: 781 ldn [THREAD_REG + T_COPYOPS], %o4 782 brz %o4, 2f 783 nop 784 ldn [%o4 + CP_XCOPYOUT], %g2 785 jmp %g2 786 nop 787 2: 788 retl 789 mov %g1, %o0 790 SET_SIZE(xcopyout) 791 792 ENTRY(xcopyout_little) 793 sethi %hi(.little_err), %o4 794 ldn [THREAD_REG + T_LOFAULT], %o5 795 or %o4, %lo(.little_err), %o4 796 membar #Sync ! sync error barrier 797 stn %o4, [THREAD_REG + T_LOFAULT] 798 799 subcc %g0, %o2, %o3 800 add %o0, %o2, %o0 801 bz,pn %ncc, 2f ! check for zero bytes 802 sub %o2, 1, %o4 803 add %o0, %o4, %o0 ! start w/last byte 804 add %o1, %o2, %o1 805 ldub [%o0+%o3], %o4 806 807 1: stba %o4, [%o1+%o3]ASI_AIUSL 808 inccc %o3 809 sub %o0, 2, %o0 ! get next byte 810 bcc,a,pt %ncc, 1b 811 ldub [%o0+%o3], %o4 812 813 2: membar #Sync ! sync error barrier 814 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 815 retl 816 mov %g0, %o0 ! return (0) 817 SET_SIZE(xcopyout_little) 818 819 /* 820 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 821 */ 822 823 ENTRY(copyin) 824 sethi %hi(.copyin_err), REAL_LOFAULT 825 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 826 827 .do_copyin: 828 ! 829 ! Check the length and bail if zero. 830 ! 831 tst %o2 832 bnz,pt %ncc, 1f 833 nop 834 retl 835 clr %o0 836 1: 837 sethi %hi(copyio_fault), %o3 838 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 839 or %o3, %lo(copyio_fault), %o3 840 membar #Sync 841 stn %o3, [THREAD_REG + T_LOFAULT] 842 843 mov %o0, SAVE_SRC 844 mov %o1, SAVE_DST 845 mov %o2, SAVE_COUNT 846 847 ! 848 ! Check to see if we're more than SMALL_LIMIT. 849 ! 850 subcc %o2, SMALL_LIMIT, %o3 851 bgu,a,pt %ncc, .dci_ns 852 or %o0, %o1, %o3 853 854 .dcibcp: 855 sub %g0, %o2, %o3 ! setup for copy loop 856 add %o0, %o2, %o0 857 add %o1, %o2, %o1 858 ba,pt %ncc, .dcicl 859 lduba [%o0 + %o3]ASI_USER, %o4 860 ! 861 ! %o0 and %o1 point at the end and remain pointing at the end 862 ! of their buffers. We pull things out by adding %o3 (which is 863 ! the negation of the length) to the buffer end which gives us 864 ! the curent location in the buffers. By incrementing %o3 we walk 865 ! through both buffers without having to bump each buffer's 866 ! pointer. A very fast 4 instruction loop. 867 ! 868 .align 16 869 .dcicl: 870 stb %o4, [%o1 + %o3] 871 inccc %o3 872 bl,a,pt %ncc, .dcicl 873 lduba [%o0 + %o3]ASI_USER, %o4 874 ! 875 ! We're done. Go home. 876 ! 877 membar #Sync 878 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 879 retl 880 clr %o0 881 ! 882 ! Try aligned copies from here. 883 ! 884 .dci_ns: 885 ! 886 ! See if we're single byte aligned. If we are, check the 887 ! limit for single byte copies. If we're smaller, or equal, 888 ! bounce to the byte for byte copy loop. Otherwise do it in 889 ! HW (if enabled). 890 ! 891 btst 1, %o3 892 bz,a,pt %icc, .dcih8 893 btst 7, %o3 894 ba .dcibcp 895 nop 896 897 .dcih8: 898 ! 899 ! 8 byte aligned? 900 ! 901 bnz,a %ncc, .dcih4 902 btst 3, %o3 903 .dcis8: 904 ! 905 ! Housekeeping for copy loops. Uses same idea as in the byte for 906 ! byte copy loop above. 907 ! 908 add %o0, %o2, %o0 909 add %o1, %o2, %o1 910 sub %g0, %o2, %o3 911 ba,pt %ncc, .didebc 912 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 913 ! 914 ! 4 byte aligned? 915 ! 916 .dcih4: 917 bnz %ncc, .dcih2 918 nop 919 .dcis4: 920 ! 921 ! Housekeeping for copy loops. Uses same idea as in the byte 922 ! for byte copy loop above. 923 ! 924 add %o0, %o2, %o0 925 add %o1, %o2, %o1 926 sub %g0, %o2, %o3 927 ba,pt %ncc, .didfbc 928 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 929 .dcih2: 930 .dcis2: 931 add %o0, %o2, %o0 932 add %o1, %o2, %o1 933 sub %g0, %o2, %o3 934 ba,pt %ncc, .didtbc 935 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 936 937 .didebc: 938 ldxa [%o0 + %o3]ASI_USER, %o4 939 deccc %o2 940 stx %o4, [%o1 + %o3] 941 bg,pt %ncc, .didebc 942 addcc %o3, 8, %o3 943 ! 944 ! End of copy loop. Most 8 byte aligned copies end here. 945 ! 946 bz,pt %ncc, .dcifh 947 nop 948 ! 949 ! Something is left. Do it byte for byte. 950 ! 951 ba,pt %ncc, .dcicl 952 lduba [%o0 + %o3]ASI_USER, %o4 953 ! 954 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 955 ! 956 .align 32 957 .didfbc: 958 lduwa [%o0 + %o3]ASI_USER, %o4 959 deccc %o2 960 st %o4, [%o1 + %o3] 961 bg,pt %ncc, .didfbc 962 addcc %o3, 4, %o3 963 ! 964 ! End of copy loop. Most 4 byte aligned copies end here. 965 ! 966 bz,pt %ncc, .dcifh 967 nop 968 ! 969 ! Something is left. Do it byte for byte. 970 ! 971 ba,pt %ncc, .dcicl 972 lduba [%o0 + %o3]ASI_USER, %o4 973 ! 974 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 975 ! copy. 976 ! 977 .align 32 978 .didtbc: 979 lduha [%o0 + %o3]ASI_USER, %o4 980 deccc %o2 981 sth %o4, [%o1 + %o3] 982 bg,pt %ncc, .didtbc 983 addcc %o3, 2, %o3 984 ! 985 ! End of copy loop. Most 2 byte aligned copies end here. 986 ! 987 bz,pt %ncc, .dcifh 988 nop 989 ! 990 ! Deal with the last byte 991 ! 992 lduba [%o0 + %o3]ASI_USER, %o4 993 stb %o4, [%o1 + %o3] 994 .dcifh: 995 membar #Sync 996 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 997 retl 998 clr %o0 999 1000 .copyin_err: 1001 ldn [THREAD_REG + T_COPYOPS], %o4 1002 brz %o4, 2f 1003 nop 1004 ldn [%o4 + CP_COPYIN], %g2 1005 jmp %g2 1006 nop 1007 2: 1008 retl 1009 mov -1, %o0 1010 SET_SIZE(copyin) 1011 1012 ENTRY(xcopyin) 1013 sethi %hi(.xcopyin_err), REAL_LOFAULT 1014 b .do_copyin 1015 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 1016 .xcopyin_err: 1017 ldn [THREAD_REG + T_COPYOPS], %o4 1018 brz %o4, 2f 1019 nop 1020 ldn [%o4 + CP_XCOPYIN], %g2 1021 jmp %g2 1022 nop 1023 2: 1024 retl 1025 mov %g1, %o0 1026 SET_SIZE(xcopyin) 1027 1028 ENTRY(xcopyin_little) 1029 sethi %hi(.little_err), %o4 1030 ldn [THREAD_REG + T_LOFAULT], %o5 1031 or %o4, %lo(.little_err), %o4 1032 membar #Sync ! sync error barrier 1033 stn %o4, [THREAD_REG + T_LOFAULT] 1034 1035 subcc %g0, %o2, %o3 1036 add %o0, %o2, %o0 1037 bz,pn %ncc, 2f ! check for zero bytes 1038 sub %o2, 1, %o4 1039 add %o0, %o4, %o0 ! start w/last byte 1040 add %o1, %o2, %o1 1041 lduba [%o0+%o3]ASI_AIUSL, %o4 1042 1043 1: stb %o4, [%o1+%o3] 1044 inccc %o3 1045 sub %o0, 2, %o0 ! get next byte 1046 bcc,a,pt %ncc, 1b 1047 lduba [%o0+%o3]ASI_AIUSL, %o4 1048 1049 2: membar #Sync ! sync error barrier 1050 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1051 retl 1052 mov %g0, %o0 ! return (0) 1053 1054 .little_err: 1055 membar #Sync ! sync error barrier 1056 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1057 retl 1058 mov %g1, %o0 1059 SET_SIZE(xcopyin_little) 1060 1061 1062 /* 1063 * Copy a block of storage - must not overlap (from + len <= to). 1064 * No fault handler installed (to be called under on_fault()) 1065 */ 1066 1067 ENTRY(copyin_noerr) 1068 sethi %hi(.copyio_noerr), REAL_LOFAULT 1069 b .do_copyin 1070 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 1071 .copyio_noerr: 1072 jmp SAVED_LOFAULT 1073 nop 1074 SET_SIZE(copyin_noerr) 1075 1076 /* 1077 * Copy a block of storage - must not overlap (from + len <= to). 1078 * No fault handler installed (to be called under on_fault()) 1079 */ 1080 1081 ENTRY(copyout_noerr) 1082 sethi %hi(.copyio_noerr), REAL_LOFAULT 1083 b .do_copyout 1084 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 1085 SET_SIZE(copyout_noerr) 1086 1087 .align 4 1088 DGDEF(use_hw_bcopy) 1089 .word 1 1090 DGDEF(use_hw_bzero) 1091 .word 1 1092 1093 .align 64 1094 .section ".text" 1095 1096 1097 /* 1098 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 1099 * longer than 256 bytes in length. For the generic module we will simply 1100 * call bzero and return 1 to ensure that the pages in cache should be 1101 * flushed to ensure integrity. 1102 * Caller is responsible for ensuring use_hw_bzero is true and that 1103 * kpreempt_disable() has been called. 1104 */ 1105 ! %i0 - start address 1106 ! %i1 - length of region (multiple of 64) 1107 1108 ENTRY(hwblkclr) 1109 save %sp, -SA(MINFRAME), %sp 1110 1111 ! Simply call bzero and notify the caller that bzero was used 1112 mov %i0, %o0 1113 call bzero 1114 mov %i1, %o1 1115 ret 1116 restore %g0, 1, %o0 ! return (1) - did not use block operations 1117 1118 SET_SIZE(hwblkclr) 1119 1120 /* 1121 * Copy 32 bytes of data from src (%o0) to dst (%o1) 1122 * using physical addresses. 1123 */ 1124 ENTRY_NP(hw_pa_bcopy32) 1125 rdpr %pstate, %g1 1126 andn %g1, PSTATE_IE, %g2 1127 wrpr %g0, %g2, %pstate 1128 1129 ldxa [%o0]ASI_MEM, %o2 1130 add %o0, 8, %o0 1131 ldxa [%o0]ASI_MEM, %o3 1132 add %o0, 8, %o0 1133 ldxa [%o0]ASI_MEM, %o4 1134 add %o0, 8, %o0 1135 ldxa [%o0]ASI_MEM, %o5 1136 stxa %o2, [%o1]ASI_MEM 1137 add %o1, 8, %o1 1138 stxa %o3, [%o1]ASI_MEM 1139 add %o1, 8, %o1 1140 stxa %o4, [%o1]ASI_MEM 1141 add %o1, 8, %o1 1142 stxa %o5, [%o1]ASI_MEM 1143 1144 membar #Sync 1145 retl 1146 wrpr %g0, %g1, %pstate 1147 SET_SIZE(hw_pa_bcopy32) 1148 1149 /* 1150 * Zero a block of storage. 1151 * 1152 * uzero is used by the kernel to zero a block in user address space. 1153 */ 1154 1155 1156 ENTRY(uzero) 1157 ! 1158 ! Set a new lo_fault handler only if we came in with one 1159 ! already specified. 1160 ! 1161 wr %g0, ASI_USER, %asi 1162 ldn [THREAD_REG + T_LOFAULT], %o5 1163 tst %o5 1164 bz,pt %ncc, .do_zero 1165 sethi %hi(.zeroerr), %o2 1166 or %o2, %lo(.zeroerr), %o2 1167 membar #Sync 1168 ba,pt %ncc, .do_zero 1169 stn %o2, [THREAD_REG + T_LOFAULT] 1170 1171 ENTRY(kzero) 1172 ! 1173 ! Always set a lo_fault handler 1174 ! 1175 wr %g0, ASI_P, %asi 1176 ldn [THREAD_REG + T_LOFAULT], %o5 1177 sethi %hi(.zeroerr), %o2 1178 or %o5, LOFAULT_SET, %o5 1179 or %o2, %lo(.zeroerr), %o2 1180 membar #Sync 1181 ba,pt %ncc, .do_zero 1182 stn %o2, [THREAD_REG + T_LOFAULT] 1183 1184 /* 1185 * We got here because of a fault during kzero or if 1186 * uzero or bzero was called with t_lofault non-zero. 1187 * Otherwise we've already run screaming from the room. 1188 * Errno value is in %g1. Note that we're here iff 1189 * we did set t_lofault. 1190 */ 1191 .zeroerr: 1192 ! 1193 ! Undo asi register setting. Just set it to be the 1194 ! kernel default without checking. 1195 ! 1196 wr %g0, ASI_P, %asi 1197 1198 ! 1199 ! We did set t_lofault. It may well have been zero coming in. 1200 ! 1201 1: 1202 tst %o5 1203 membar #Sync 1204 bne,pn %ncc, 3f 1205 andncc %o5, LOFAULT_SET, %o5 1206 2: 1207 ! 1208 ! Old handler was zero. Just return the error. 1209 ! 1210 retl ! return 1211 mov %g1, %o0 ! error code from %g1 1212 3: 1213 ! 1214 ! We're here because %o5 was non-zero. It was non-zero 1215 ! because either LOFAULT_SET was present, a previous fault 1216 ! handler was present or both. In all cases we need to reset 1217 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 1218 ! before we either simply return the error or we invoke the 1219 ! previously specified handler. 1220 ! 1221 be %ncc, 2b 1222 stn %o5, [THREAD_REG + T_LOFAULT] 1223 jmp %o5 ! goto real handler 1224 nop 1225 SET_SIZE(kzero) 1226 SET_SIZE(uzero) 1227 1228 /* 1229 * Zero a block of storage. 1230 */ 1231 1232 ENTRY(bzero) 1233 wr %g0, ASI_P, %asi 1234 1235 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 1236 tst %o5 1237 bz,pt %ncc, .do_zero 1238 sethi %hi(.zeroerr), %o2 1239 or %o2, %lo(.zeroerr), %o2 1240 membar #Sync ! sync error barrier 1241 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 1242 1243 .do_zero: 1244 cmp %o1, 7 1245 blu,pn %ncc, .byteclr 1246 nop 1247 1248 cmp %o1, 15 1249 blu,pn %ncc, .wdalign 1250 nop 1251 1252 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 1253 bz,pt %ncc, .blkalign ! already double aligned 1254 sub %o3, 8, %o3 ! -(bytes till double aligned) 1255 add %o1, %o3, %o1 ! update o1 with new count 1256 1257 1: 1258 stba %g0, [%o0]%asi 1259 inccc %o3 1260 bl,pt %ncc, 1b 1261 inc %o0 1262 1263 ! Now address is double aligned 1264 .blkalign: 1265 cmp %o1, 0x80 ! check if there are 128 bytes to set 1266 blu,pn %ncc, .bzero_small 1267 mov %o1, %o3 1268 1269 andcc %o0, 0x3f, %o3 ! is block aligned? 1270 bz,pt %ncc, .bzero_blk 1271 sub %o3, 0x40, %o3 ! -(bytes till block aligned) 1272 add %o1, %o3, %o1 ! o1 is the remainder 1273 1274 ! Clear -(%o3) bytes till block aligned 1275 1: 1276 stxa %g0, [%o0]%asi 1277 addcc %o3, 8, %o3 1278 bl,pt %ncc, 1b 1279 add %o0, 8, %o0 1280 1281 .bzero_blk: 1282 and %o1, 0x3f, %o3 ! calc bytes left after blk clear 1283 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 1284 1285 cmp %o4, 0x100 ! 256 bytes or more 1286 blu,pn %ncc, 3f 1287 nop 1288 1289 2: 1290 stxa %g0, [%o0+0x0]%asi 1291 stxa %g0, [%o0+0x40]%asi 1292 stxa %g0, [%o0+0x80]%asi 1293 stxa %g0, [%o0+0xc0]%asi 1294 1295 stxa %g0, [%o0+0x8]%asi 1296 stxa %g0, [%o0+0x10]%asi 1297 stxa %g0, [%o0+0x18]%asi 1298 stxa %g0, [%o0+0x20]%asi 1299 stxa %g0, [%o0+0x28]%asi 1300 stxa %g0, [%o0+0x30]%asi 1301 stxa %g0, [%o0+0x38]%asi 1302 1303 stxa %g0, [%o0+0x48]%asi 1304 stxa %g0, [%o0+0x50]%asi 1305 stxa %g0, [%o0+0x58]%asi 1306 stxa %g0, [%o0+0x60]%asi 1307 stxa %g0, [%o0+0x68]%asi 1308 stxa %g0, [%o0+0x70]%asi 1309 stxa %g0, [%o0+0x78]%asi 1310 1311 stxa %g0, [%o0+0x88]%asi 1312 stxa %g0, [%o0+0x90]%asi 1313 stxa %g0, [%o0+0x98]%asi 1314 stxa %g0, [%o0+0xa0]%asi 1315 stxa %g0, [%o0+0xa8]%asi 1316 stxa %g0, [%o0+0xb0]%asi 1317 stxa %g0, [%o0+0xb8]%asi 1318 1319 stxa %g0, [%o0+0xc8]%asi 1320 stxa %g0, [%o0+0xd0]%asi 1321 stxa %g0, [%o0+0xd8]%asi 1322 stxa %g0, [%o0+0xe0]%asi 1323 stxa %g0, [%o0+0xe8]%asi 1324 stxa %g0, [%o0+0xf0]%asi 1325 stxa %g0, [%o0+0xf8]%asi 1326 1327 sub %o4, 0x100, %o4 1328 cmp %o4, 0x100 1329 bgu,pt %ncc, 2b 1330 add %o0, 0x100, %o0 1331 1332 3: 1333 ! ... check if 64 bytes to set 1334 cmp %o4, 0x40 1335 blu %ncc, .bzero_blk_done 1336 nop 1337 1338 4: 1339 stxa %g0, [%o0+0x0]%asi 1340 stxa %g0, [%o0+0x8]%asi 1341 stxa %g0, [%o0+0x10]%asi 1342 stxa %g0, [%o0+0x18]%asi 1343 stxa %g0, [%o0+0x20]%asi 1344 stxa %g0, [%o0+0x28]%asi 1345 stxa %g0, [%o0+0x30]%asi 1346 stxa %g0, [%o0+0x38]%asi 1347 1348 subcc %o4, 0x40, %o4 1349 bgu,pt %ncc, 3b 1350 add %o0, 0x40, %o0 1351 1352 .bzero_blk_done: 1353 membar #Sync 1354 1355 .bzero_small: 1356 ! Set the remaining doubles 1357 subcc %o3, 8, %o3 ! Can we store any doubles? 1358 blu,pn %ncc, .byteclr 1359 and %o1, 7, %o1 ! calc bytes left after doubles 1360 1361 .dbclr: 1362 stxa %g0, [%o0]%asi ! Clear the doubles 1363 subcc %o3, 8, %o3 1364 bgeu,pt %ncc, .dbclr 1365 add %o0, 8, %o0 1366 1367 ba .byteclr 1368 nop 1369 1370 .wdalign: 1371 andcc %o0, 3, %o3 ! is add aligned on a word boundary 1372 bz,pn %ncc, .wdclr 1373 andn %o1, 3, %o3 ! create word sized count in %o3 1374 1375 dec %o1 ! decrement count 1376 stba %g0, [%o0]%asi ! clear a byte 1377 ba .wdalign 1378 inc %o0 ! next byte 1379 1380 .wdclr: 1381 sta %g0, [%o0]%asi ! 4-byte clearing loop 1382 subcc %o3, 4, %o3 1383 bnz,pt %ncc, .wdclr 1384 inc 4, %o0 1385 1386 and %o1, 3, %o1 ! leftover count, if any 1387 1388 .byteclr: 1389 ! Set the leftover bytes 1390 brz %o1, .bzero_exit 1391 nop 1392 1393 7: 1394 deccc %o1 ! byte clearing loop 1395 stba %g0, [%o0]%asi 1396 bgu,pt %ncc, 7b 1397 inc %o0 1398 1399 .bzero_exit: 1400 ! 1401 ! We're just concerned with whether t_lofault was set 1402 ! when we came in. We end up here from either kzero() 1403 ! or bzero(). kzero() *always* sets a lofault handler. 1404 ! It ors LOFAULT_SET into %o5 to indicate it has done 1405 ! this even if the value of %o5 is otherwise zero. 1406 ! bzero() sets a lofault handler *only* if one was 1407 ! previously set. Accordingly we need to examine 1408 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 1409 ! before resetting the error handler. 1410 ! 1411 tst %o5 1412 bz %ncc, 1f 1413 andn %o5, LOFAULT_SET, %o5 1414 membar #Sync ! sync error barrier 1415 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1416 1: 1417 retl 1418 clr %o0 ! return (0) 1419 1420 SET_SIZE(bzero)