1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 # ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/errno.h> 30 #include <sys/asm_linkage.h> 31 #include <sys/vtrace.h> 32 #include <sys/machthread.h> 33 #include <sys/clock.h> 34 #include <sys/asi.h> 35 #include <sys/fsr.h> 36 #include <sys/privregs.h> 37 38 #if !defined(lint) 39 #include "assym.h" 40 #endif /* lint */ 41 42 43 /* 44 * Less then or equal this number of bytes we will always copy byte-for-byte 45 */ 46 #define SMALL_LIMIT 7 47 48 /* 49 * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault 50 * handler was set 51 */ 52 #define LOFAULT_SET 2 53 54 55 /* 56 * Copy a block of storage, returning an error code if `from' or 57 * `to' takes a kernel pagefault which cannot be resolved. 58 * Returns errno value on pagefault error, 0 if all ok 59 */ 60 61 62 63 #if defined(lint) 64 65 /* ARGSUSED */ 66 int 67 kcopy(const void *from, void *to, size_t count) 68 { return(0); } 69 70 #else /* lint */ 71 72 .seg ".text" 73 .align 4 74 75 ENTRY(kcopy) 76 77 save %sp, -SA(MINFRAME), %sp 78 set .copyerr, %l7 ! copyerr is lofault value 79 ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler 80 or %o5, LOFAULT_SET, %o5 81 membar #Sync ! sync error barrier 82 b .do_copy ! common code 83 stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 84 85 /* 86 * We got here because of a fault during kcopy. 87 * Errno value is in %g1. 88 */ 89 .copyerr: 90 ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET 91 ! into %o5 to indicate it has set t_lofault handler. Need to clear 92 ! LOFAULT_SET flag before restoring the error handler. 93 andn %o5, LOFAULT_SET, %o5 94 membar #Sync ! sync error barrier 95 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 96 ret 97 restore %g1, 0, %o0 98 99 SET_SIZE(kcopy) 100 #endif /* lint */ 101 102 103 /* 104 * Copy a block of storage - must not overlap (from + len <= to). 105 */ 106 #if defined(lint) 107 108 /* ARGSUSED */ 109 void 110 bcopy(const void *from, void *to, size_t count) 111 {} 112 113 #else /* lint */ 114 115 ENTRY(bcopy) 116 117 save %sp, -SA(MINFRAME), %sp 118 clr %o5 ! flag LOFAULT_SET is not set for bcopy 119 120 .do_copy: 121 mov %i1, %g5 ! save dest addr start 122 123 mov %i2, %l6 ! save size 124 125 cmp %i2, 12 ! for small counts 126 blu %ncc, .bytecp ! just copy bytes 127 .empty 128 129 ! 130 ! use aligned transfers where possible 131 ! 132 xor %i0, %i1, %o4 ! xor from and to address 133 btst 7, %o4 ! if lower three bits zero 134 bz .aldoubcp ! can align on double boundary 135 .empty ! assembler complaints about label 136 137 xor %i0, %i1, %o4 ! xor from and to address 138 btst 3, %o4 ! if lower two bits zero 139 bz .alwordcp ! can align on word boundary 140 btst 3, %i0 ! delay slot, from address unaligned? 141 ! 142 ! use aligned reads and writes where possible 143 ! this differs from wordcp in that it copes 144 ! with odd alignment between source and destnation 145 ! using word reads and writes with the proper shifts 146 ! in between to align transfers to and from memory 147 ! i0 - src address, i1 - dest address, i2 - count 148 ! i3, i4 - tmps for used generating complete word 149 ! i5 (word to write) 150 ! l0 size in bits of upper part of source word (US) 151 ! l1 size in bits of lower part of source word (LS = 32 - US) 152 ! l2 size in bits of upper part of destination word (UD) 153 ! l3 size in bits of lower part of destination word (LD = 32 - UD) 154 ! l4 number of bytes leftover after aligned transfers complete 155 ! l5 the number 32 156 ! 157 mov 32, %l5 ! load an oft-needed constant 158 bz .align_dst_only 159 btst 3, %i1 ! is destnation address aligned? 160 clr %i4 ! clear registers used in either case 161 bz .align_src_only 162 clr %l0 163 ! 164 ! both source and destination addresses are unaligned 165 ! 166 1: ! align source 167 ldub [%i0], %i3 ! read a byte from source address 168 add %i0, 1, %i0 ! increment source address 169 or %i4, %i3, %i4 ! or in with previous bytes (if any) 170 btst 3, %i0 ! is source aligned? 171 add %l0, 8, %l0 ! increment size of upper source (US) 172 bnz,a 1b 173 sll %i4, 8, %i4 ! make room for next byte 174 175 sub %l5, %l0, %l1 ! generate shift left count (LS) 176 sll %i4, %l1, %i4 ! prepare to get rest 177 ld [%i0], %i3 ! read a word 178 add %i0, 4, %i0 ! increment source address 179 srl %i3, %l0, %i5 ! upper src bits into lower dst bits 180 or %i4, %i5, %i5 ! merge 181 mov 24, %l3 ! align destination 182 1: 183 srl %i5, %l3, %i4 ! prepare to write a single byte 184 stb %i4, [%i1] ! write a byte 185 add %i1, 1, %i1 ! increment destination address 186 sub %i2, 1, %i2 ! decrement count 187 btst 3, %i1 ! is destination aligned? 188 bnz,a 1b 189 sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) 190 sub %l5, %l3, %l2 ! generate shift left count (UD) 191 sll %i5, %l2, %i5 ! move leftover into upper bytes 192 cmp %l2, %l0 ! cmp # reqd to fill dst w old src left 193 bgu %ncc, .more_needed ! need more to fill than we have 194 nop 195 196 sll %i3, %l1, %i3 ! clear upper used byte(s) 197 srl %i3, %l1, %i3 198 ! get the odd bytes between alignments 199 sub %l0, %l2, %l0 ! regenerate shift count 200 sub %l5, %l0, %l1 ! generate new shift left count (LS) 201 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 202 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 203 srl %i3, %l0, %i4 204 or %i5, %i4, %i5 205 st %i5, [%i1] ! write a word 206 subcc %i2, 4, %i2 ! decrement count 207 bz %ncc, .unalign_out 208 add %i1, 4, %i1 ! increment destination address 209 210 b 2f 211 sll %i3, %l1, %i5 ! get leftover into upper bits 212 .more_needed: 213 sll %i3, %l0, %i3 ! save remaining byte(s) 214 srl %i3, %l0, %i3 215 sub %l2, %l0, %l1 ! regenerate shift count 216 sub %l5, %l1, %l0 ! generate new shift left count 217 sll %i3, %l1, %i4 ! move to fill empty space 218 b 3f 219 or %i5, %i4, %i5 ! merge to complete word 220 ! 221 ! the source address is aligned and destination is not 222 ! 223 .align_dst_only: 224 ld [%i0], %i4 ! read a word 225 add %i0, 4, %i0 ! increment source address 226 mov 24, %l0 ! initial shift alignment count 227 1: 228 srl %i4, %l0, %i3 ! prepare to write a single byte 229 stb %i3, [%i1] ! write a byte 230 add %i1, 1, %i1 ! increment destination address 231 sub %i2, 1, %i2 ! decrement count 232 btst 3, %i1 ! is destination aligned? 233 bnz,a 1b 234 sub %l0, 8, %l0 ! delay slot, decrement shift count 235 .xfer: 236 sub %l5, %l0, %l1 ! generate shift left count 237 sll %i4, %l1, %i5 ! get leftover 238 3: 239 and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 240 andn %i2, 3, %i2 ! # of aligned bytes that can be moved 241 2: 242 ld [%i0], %i3 ! read a source word 243 add %i0, 4, %i0 ! increment source address 244 srl %i3, %l0, %i4 ! upper src bits into lower dst bits 245 or %i5, %i4, %i5 ! merge with upper dest bits (leftover) 246 st %i5, [%i1] ! write a destination word 247 subcc %i2, 4, %i2 ! decrement count 248 bz %ncc, .unalign_out ! check if done 249 add %i1, 4, %i1 ! increment destination address 250 b 2b ! loop 251 sll %i3, %l1, %i5 ! get leftover 252 .unalign_out: 253 tst %l4 ! any bytes leftover? 254 bz %ncc, .cpdone 255 .empty ! allow next instruction in delay slot 256 1: 257 sub %l0, 8, %l0 ! decrement shift 258 srl %i3, %l0, %i4 ! upper src byte into lower dst byte 259 stb %i4, [%i1] ! write a byte 260 subcc %l4, 1, %l4 ! decrement count 261 bz %ncc, .cpdone ! done? 262 add %i1, 1, %i1 ! increment destination 263 tst %l0 ! any more previously read bytes 264 bnz %ncc, 1b ! we have leftover bytes 265 mov %l4, %i2 ! delay slot, mv cnt where dbytecp wants 266 b .dbytecp ! let dbytecp do the rest 267 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 268 ! 269 ! the destination address is aligned and the source is not 270 ! 271 .align_src_only: 272 ldub [%i0], %i3 ! read a byte from source address 273 add %i0, 1, %i0 ! increment source address 274 or %i4, %i3, %i4 ! or in with previous bytes (if any) 275 btst 3, %i0 ! is source aligned? 276 add %l0, 8, %l0 ! increment shift count (US) 277 bnz,a .align_src_only 278 sll %i4, 8, %i4 ! make room for next byte 279 b,a .xfer 280 ! 281 ! if from address unaligned for double-word moves, 282 ! move bytes till it is, if count is < 56 it could take 283 ! longer to align the thing than to do the transfer 284 ! in word size chunks right away 285 ! 286 .aldoubcp: 287 cmp %i2, 56 ! if count < 56, use wordcp, it takes 288 blu,a %ncc, .alwordcp ! longer to align doubles than words 289 mov 3, %o0 ! mask for word alignment 290 call .alignit ! copy bytes until aligned 291 mov 7, %o0 ! mask for double alignment 292 ! 293 ! source and destination are now double-word aligned 294 ! i3 has aligned count returned by alignit 295 ! 296 and %i2, 7, %i2 ! unaligned leftover count 297 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 298 5: 299 ldx [%i0+%i1], %o4 ! read from address 300 stx %o4, [%i1] ! write at destination address 301 subcc %i3, 8, %i3 ! dec count 302 bgu %ncc, 5b 303 add %i1, 8, %i1 ! delay slot, inc to address 304 cmp %i2, 4 ! see if we can copy a word 305 blu %ncc, .dbytecp ! if 3 or less bytes use bytecp 306 .empty 307 ! 308 ! for leftover bytes we fall into wordcp, if needed 309 ! 310 .wordcp: 311 and %i2, 3, %i2 ! unaligned leftover count 312 5: 313 ld [%i0+%i1], %o4 ! read from address 314 st %o4, [%i1] ! write at destination address 315 subcc %i3, 4, %i3 ! dec count 316 bgu %ncc, 5b 317 add %i1, 4, %i1 ! delay slot, inc to address 318 b,a .dbytecp 319 320 ! we come here to align copies on word boundaries 321 .alwordcp: 322 call .alignit ! go word-align it 323 mov 3, %o0 ! bits that must be zero to be aligned 324 b .wordcp 325 sub %i0, %i1, %i0 ! i0 gets the difference of src and dst 326 327 ! 328 ! byte copy, works with any alignment 329 ! 330 .bytecp: 331 b .dbytecp 332 sub %i0, %i1, %i0 ! i0 gets difference of src and dst 333 334 ! 335 ! differenced byte copy, works with any alignment 336 ! assumes dest in %i1 and (source - dest) in %i0 337 ! 338 1: 339 stb %o4, [%i1] ! write to address 340 inc %i1 ! inc to address 341 .dbytecp: 342 deccc %i2 ! dec count 343 bgeu,a %ncc, 1b ! loop till done 344 ldub [%i0+%i1], %o4 ! read from address 345 .cpdone: 346 membar #Sync ! sync error barrier 347 ! Restore t_lofault handler, if came here from kcopy(). 348 tst %o5 349 bz %ncc, 1f 350 andn %o5, LOFAULT_SET, %o5 351 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 352 1: 353 mov %g5, %o0 ! copy dest address 354 call sync_icache 355 mov %l6, %o1 ! saved size 356 ret 357 restore %g0, 0, %o0 ! return (0) 358 359 /* 360 * Common code used to align transfers on word and doubleword 361 * boudaries. Aligns source and destination and returns a count 362 * of aligned bytes to transfer in %i3 363 */ 364 1: 365 inc %i0 ! inc from 366 stb %o4, [%i1] ! write a byte 367 inc %i1 ! inc to 368 dec %i2 ! dec count 369 .alignit: 370 btst %o0, %i0 ! %o0 is bit mask to check for alignment 371 bnz,a 1b 372 ldub [%i0], %o4 ! read next byte 373 374 retl 375 andn %i2, %o0, %i3 ! return size of aligned bytes 376 SET_SIZE(bcopy) 377 378 #endif /* lint */ 379 380 /* 381 * Block copy with possibly overlapped operands. 382 */ 383 384 #if defined(lint) 385 386 /*ARGSUSED*/ 387 void 388 ovbcopy(const void *from, void *to, size_t count) 389 {} 390 391 #else /* lint */ 392 393 ENTRY(ovbcopy) 394 tst %o2 ! check count 395 bgu,a %ncc, 1f ! nothing to do or bad arguments 396 subcc %o0, %o1, %o3 ! difference of from and to address 397 398 retl ! return 399 nop 400 1: 401 bneg,a %ncc, 2f 402 neg %o3 ! if < 0, make it positive 403 2: cmp %o2, %o3 ! cmp size and abs(from - to) 404 bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 405 .empty ! no overlap 406 cmp %o0, %o1 ! compare from and to addresses 407 blu %ncc, .ov_bkwd ! if from < to, copy backwards 408 nop 409 ! 410 ! Copy forwards. 411 ! 412 .ov_fwd: 413 ldub [%o0], %o3 ! read from address 414 inc %o0 ! inc from address 415 stb %o3, [%o1] ! write to address 416 deccc %o2 ! dec count 417 bgu %ncc, .ov_fwd ! loop till done 418 inc %o1 ! inc to address 419 420 retl ! return 421 nop 422 ! 423 ! Copy backwards. 424 ! 425 .ov_bkwd: 426 deccc %o2 ! dec count 427 ldub [%o0 + %o2], %o3 ! get byte at end of src 428 bgu %ncc, .ov_bkwd ! loop till done 429 stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 430 431 retl ! return 432 nop 433 SET_SIZE(ovbcopy) 434 435 #endif /* lint */ 436 437 /* 438 * hwblkpagecopy() 439 * 440 * Copies exactly one page. This routine assumes the caller (ppcopy) 441 * has already disabled kernel preemption and has checked 442 * use_hw_bcopy. 443 */ 444 #ifdef lint 445 /*ARGSUSED*/ 446 void 447 hwblkpagecopy(const void *src, void *dst) 448 { } 449 #else /* lint */ 450 ENTRY(hwblkpagecopy) 451 save %sp, -SA(MINFRAME), %sp 452 453 ! %i0 - source address (arg) 454 ! %i1 - destination address (arg) 455 ! %i2 - length of region (not arg) 456 457 set PAGESIZE, %i2 458 mov %i1, %o0 ! store destination address for flushing 459 460 /* 461 * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 462 */ 463 1: 464 ldx [%i0+0x0], %l0 465 ldx [%i0+0x8], %l1 466 ldx [%i0+0x10], %l2 467 ldx [%i0+0x18], %l3 468 ldx [%i0+0x20], %l4 469 ldx [%i0+0x28], %l5 470 ldx [%i0+0x30], %l6 471 ldx [%i0+0x38], %l7 472 stx %l0, [%i1+0x0] 473 stx %l1, [%i1+0x8] 474 stx %l2, [%i1+0x10] 475 stx %l3, [%i1+0x18] 476 stx %l4, [%i1+0x20] 477 stx %l5, [%i1+0x28] 478 stx %l6, [%i1+0x30] 479 stx %l7, [%i1+0x38] 480 481 ldx [%i0+0x40], %l0 482 ldx [%i0+0x48], %l1 483 ldx [%i0+0x50], %l2 484 ldx [%i0+0x58], %l3 485 ldx [%i0+0x60], %l4 486 ldx [%i0+0x68], %l5 487 ldx [%i0+0x70], %l6 488 ldx [%i0+0x78], %l7 489 stx %l0, [%i1+0x40] 490 stx %l1, [%i1+0x48] 491 stx %l2, [%i1+0x50] 492 stx %l3, [%i1+0x58] 493 stx %l4, [%i1+0x60] 494 stx %l5, [%i1+0x68] 495 stx %l6, [%i1+0x70] 496 stx %l7, [%i1+0x78] 497 498 add %i0, 0x80, %i0 499 subcc %i2, 0x80, %i2 500 bgu,pt %xcc, 1b 501 add %i1, 0x80, %i1 502 503 ! %o0 contains the dest. address 504 set PAGESIZE, %o1 505 call sync_icache 506 nop 507 508 membar #Sync 509 ret 510 restore %g0, 0, %o0 511 SET_SIZE(hwblkpagecopy) 512 #endif /* lint */ 513 514 515 /* 516 * Transfer data to and from user space - 517 * Note that these routines can cause faults 518 * It is assumed that the kernel has nothing at 519 * less than KERNELBASE in the virtual address space. 520 * 521 * Note that copyin(9F) and copyout(9F) are part of the 522 * DDI/DKI which specifies that they return '-1' on "errors." 523 * 524 * Sigh. 525 * 526 * So there's two extremely similar routines - xcopyin() and xcopyout() 527 * which return the errno that we've faithfully computed. This 528 * allows other callers (e.g. uiomove(9F)) to work correctly. 529 * Given that these are used pretty heavily, we expand the calling 530 * sequences inline for all flavours (rather than making wrappers). 531 * 532 * There are also stub routines for xcopyout_little and xcopyin_little, 533 * which currently are intended to handle requests of <= 16 bytes from 534 * do_unaligned. Future enhancement to make them handle 8k pages efficiently 535 * is left as an exercise... 536 */ 537 538 /* 539 * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 540 * 541 * General theory of operation: 542 * 543 * None of the copyops routines grab a window. 544 * 545 * Flow: 546 * 547 * If count == zero return zero. 548 * 549 * Store the previous lo_fault handler into %g6. 550 * Place our secondary lofault handler into %g5. 551 * Place the address of our fault handler into %o3. 552 * 553 * If count is less than or equal to SMALL_LIMIT (7) we 554 * always do a byte for byte copy. 555 * 556 * If count is > SMALL_LIMIT, we check the alignment of the input 557 * and output pointers. We store -count in %o3, we store the number 558 * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop 559 * in %o2. Following this we branch to the appropriate copy loop and 560 * copy that many chunks. Since we've been adding the chunk size 561 * to %o3 each time through as well as decrementing %o2, we can tell 562 * if any data is is left to be copied by examining %o3. If that is 563 * zero, we're done and can go home. If not, we figure out what the 564 * largest chunk size left to be copied is and branch to that copy 565 * loop unless there's only one byte left. We load that as we're 566 * branching to code that stores it just before we return. 567 * 568 * Fault handlers are invoked if we reference memory that has no 569 * current mapping. All forms share the same copyio_fault handler. 570 * This routine handles fixing up the stack and general housecleaning. 571 * Each copy operation has a simple fault handler that is then called 572 * to do the work specific to the invidual operation. The handler 573 * for copyOP and xcopyOP are found at the end of individual function. 574 * The handlers for xcopyOP_little are found at the end of xcopyin_little. 575 * The handlers for copyOP_noerr are found at the end of copyin_noerr. 576 */ 577 578 /* 579 * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 580 */ 581 582 #if defined(lint) 583 584 /*ARGSUSED*/ 585 int 586 copyout(const void *kaddr, void *uaddr, size_t count) 587 { return (0); } 588 589 #else /* lint */ 590 591 /* 592 * We save the arguments in the following registers in case of a fault: 593 * kaddr - %g2 594 * uaddr - %g3 595 * count - %g4 596 */ 597 #define SAVE_SRC %g2 598 #define SAVE_DST %g3 599 #define SAVE_COUNT %g4 600 601 #define REAL_LOFAULT %g5 602 #define SAVED_LOFAULT %g6 603 604 /* 605 * Generic copyio fault handler. This is the first line of defense when a 606 * fault occurs in (x)copyin/(x)copyout. In order for this to function 607 * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 608 * This allows us to share common code for all the flavors of the copy 609 * operations, including the _noerr versions. 610 * 611 * Note that this function will restore the original input parameters before 612 * calling REAL_LOFAULT. So the real handler can vector to the appropriate 613 * member of the t_copyop structure, if needed. 614 */ 615 ENTRY(copyio_fault) 616 membar #Sync 617 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 618 619 mov SAVE_SRC, %o0 620 mov SAVE_DST, %o1 621 jmp REAL_LOFAULT 622 mov SAVE_COUNT, %o2 623 SET_SIZE(copyio_fault) 624 625 ENTRY(copyout) 626 sethi %hi(.copyout_err), REAL_LOFAULT 627 or REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT 628 629 .do_copyout: 630 ! 631 ! Check the length and bail if zero. 632 ! 633 tst %o2 634 bnz,pt %ncc, 1f 635 nop 636 retl 637 clr %o0 638 1: 639 sethi %hi(copyio_fault), %o3 640 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 641 or %o3, %lo(copyio_fault), %o3 642 membar #Sync 643 stn %o3, [THREAD_REG + T_LOFAULT] 644 645 mov %o0, SAVE_SRC 646 mov %o1, SAVE_DST 647 mov %o2, SAVE_COUNT 648 649 ! 650 ! Check to see if we're more than SMALL_LIMIT (7 bytes). 651 ! Run in leaf mode, using the %o regs as our input regs. 652 ! 653 subcc %o2, SMALL_LIMIT, %o3 654 bgu,a,pt %ncc, .dco_ns 655 or %o0, %o1, %o3 656 657 .dcobcp: 658 sub %g0, %o2, %o3 ! negate count 659 add %o0, %o2, %o0 ! make %o0 point at the end 660 add %o1, %o2, %o1 ! make %o1 point at the end 661 ba,pt %ncc, .dcocl 662 ldub [%o0 + %o3], %o4 ! load first byte 663 ! 664 ! %o0 and %o2 point at the end and remain pointing at the end 665 ! of their buffers. We pull things out by adding %o3 (which is 666 ! the negation of the length) to the buffer end which gives us 667 ! the curent location in the buffers. By incrementing %o3 we walk 668 ! through both buffers without having to bump each buffer's 669 ! pointer. A very fast 4 instruction loop. 670 ! 671 .align 16 672 .dcocl: 673 stba %o4, [%o1 + %o3]ASI_USER 674 inccc %o3 675 bl,a,pt %ncc, .dcocl 676 ldub [%o0 + %o3], %o4 677 ! 678 ! We're done. Go home. 679 ! 680 membar #Sync 681 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 682 retl 683 clr %o0 684 ! 685 ! Try aligned copies from here. 686 ! 687 .dco_ns: 688 ! %o0 = kernel addr (to be copied from) 689 ! %o1 = user addr (to be copied to) 690 ! %o2 = length 691 ! %o3 = %o1 | %o2 (used for alignment checking) 692 ! %o4 is alternate lo_fault 693 ! %o5 is original lo_fault 694 ! 695 ! See if we're single byte aligned. If we are, check the 696 ! limit for single byte copies. If we're smaller or equal, 697 ! bounce to the byte for byte copy loop. Otherwise do it in 698 ! HW (if enabled). 699 ! 700 btst 1, %o3 701 bz,pt %icc, .dcoh8 702 btst 7, %o3 703 704 ba .dcobcp 705 nop 706 .dcoh8: 707 ! 708 ! 8 byte aligned? 709 ! 710 bnz,a %ncc, .dcoh4 711 btst 3, %o3 712 .dcos8: 713 ! 714 ! Housekeeping for copy loops. Uses same idea as in the byte for 715 ! byte copy loop above. 716 ! 717 add %o0, %o2, %o0 718 add %o1, %o2, %o1 719 sub %g0, %o2, %o3 720 ba,pt %ncc, .dodebc 721 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 722 ! 723 ! 4 byte aligned? 724 ! 725 .dcoh4: 726 bnz,pn %ncc, .dcoh2 727 nop 728 .dcos4: 729 add %o0, %o2, %o0 730 add %o1, %o2, %o1 731 sub %g0, %o2, %o3 732 ba,pt %ncc, .dodfbc 733 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 734 ! 735 ! We must be 2 byte aligned. Off we go. 736 ! The check for small copies was done in the 737 ! delay at .dcoh4 738 ! 739 .dcoh2: 740 .dcos2: 741 add %o0, %o2, %o0 742 add %o1, %o2, %o1 743 sub %g0, %o2, %o3 744 ba,pt %ncc, .dodtbc 745 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 746 747 .dodebc: 748 ldx [%o0 + %o3], %o4 749 deccc %o2 750 stxa %o4, [%o1 + %o3]ASI_USER 751 bg,pt %ncc, .dodebc 752 addcc %o3, 8, %o3 753 ! 754 ! End of copy loop. Check to see if we're done. Most 755 ! eight byte aligned copies end here. 756 ! 757 bz,pt %ncc, .dcofh 758 nop 759 ! 760 ! Something is left - do it byte for byte. 761 ! 762 ba,pt %ncc, .dcocl 763 ldub [%o0 + %o3], %o4 ! load next byte 764 ! 765 ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy. 766 ! 767 .align 32 768 .dodfbc: 769 lduw [%o0 + %o3], %o4 770 deccc %o2 771 sta %o4, [%o1 + %o3]ASI_USER 772 bg,pt %ncc, .dodfbc 773 addcc %o3, 4, %o3 774 ! 775 ! End of copy loop. Check to see if we're done. Most 776 ! four byte aligned copies end here. 777 ! 778 bz,pt %ncc, .dcofh 779 nop 780 ! 781 ! Something is left. Do it byte for byte. 782 ! 783 ba,pt %ncc, .dcocl 784 ldub [%o0 + %o3], %o4 ! load next byte 785 ! 786 ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to 787 ! copy. 788 ! 789 .align 32 790 .dodtbc: 791 lduh [%o0 + %o3], %o4 792 deccc %o2 793 stha %o4, [%o1 + %o3]ASI_USER 794 bg,pt %ncc, .dodtbc 795 addcc %o3, 2, %o3 796 ! 797 ! End of copy loop. Anything left? 798 ! 799 bz,pt %ncc, .dcofh 800 nop 801 ! 802 ! Deal with the last byte 803 ! 804 ldub [%o0 + %o3], %o4 805 stba %o4, [%o1 + %o3]ASI_USER 806 .dcofh: 807 membar #Sync 808 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 809 retl 810 clr %o0 811 812 .copyout_err: 813 ldn [THREAD_REG + T_COPYOPS], %o4 814 brz %o4, 2f 815 nop 816 ldn [%o4 + CP_COPYOUT], %g2 817 jmp %g2 818 nop 819 2: 820 retl 821 mov -1, %o0 822 SET_SIZE(copyout) 823 824 #endif /* lint */ 825 826 827 #ifdef lint 828 829 /*ARGSUSED*/ 830 int 831 xcopyout(const void *kaddr, void *uaddr, size_t count) 832 { return (0); } 833 834 #else /* lint */ 835 836 ENTRY(xcopyout) 837 sethi %hi(.xcopyout_err), REAL_LOFAULT 838 b .do_copyout 839 or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 840 .xcopyout_err: 841 ldn [THREAD_REG + T_COPYOPS], %o4 842 brz %o4, 2f 843 nop 844 ldn [%o4 + CP_XCOPYOUT], %g2 845 jmp %g2 846 nop 847 2: 848 retl 849 mov %g1, %o0 850 SET_SIZE(xcopyout) 851 852 #endif /* lint */ 853 854 #ifdef lint 855 856 /*ARGSUSED*/ 857 int 858 xcopyout_little(const void *kaddr, void *uaddr, size_t count) 859 { return (0); } 860 861 #else /* lint */ 862 863 ENTRY(xcopyout_little) 864 sethi %hi(.little_err), %o4 865 ldn [THREAD_REG + T_LOFAULT], %o5 866 or %o4, %lo(.little_err), %o4 867 membar #Sync ! sync error barrier 868 stn %o4, [THREAD_REG + T_LOFAULT] 869 870 subcc %g0, %o2, %o3 871 add %o0, %o2, %o0 872 bz,pn %ncc, 2f ! check for zero bytes 873 sub %o2, 1, %o4 874 add %o0, %o4, %o0 ! start w/last byte 875 add %o1, %o2, %o1 876 ldub [%o0+%o3], %o4 877 878 1: stba %o4, [%o1+%o3]ASI_AIUSL 879 inccc %o3 880 sub %o0, 2, %o0 ! get next byte 881 bcc,a,pt %ncc, 1b 882 ldub [%o0+%o3], %o4 883 884 2: membar #Sync ! sync error barrier 885 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 886 retl 887 mov %g0, %o0 ! return (0) 888 SET_SIZE(xcopyout_little) 889 890 #endif /* lint */ 891 892 /* 893 * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 894 */ 895 896 #if defined(lint) 897 898 /*ARGSUSED*/ 899 int 900 copyin(const void *uaddr, void *kaddr, size_t count) 901 { return (0); } 902 903 #else /* lint */ 904 905 ENTRY(copyin) 906 sethi %hi(.copyin_err), REAL_LOFAULT 907 or REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT 908 909 .do_copyin: 910 ! 911 ! Check the length and bail if zero. 912 ! 913 tst %o2 914 bnz,pt %ncc, 1f 915 nop 916 retl 917 clr %o0 918 1: 919 sethi %hi(copyio_fault), %o3 920 ldn [THREAD_REG + T_LOFAULT], SAVED_LOFAULT 921 or %o3, %lo(copyio_fault), %o3 922 membar #Sync 923 stn %o3, [THREAD_REG + T_LOFAULT] 924 925 mov %o0, SAVE_SRC 926 mov %o1, SAVE_DST 927 mov %o2, SAVE_COUNT 928 929 ! 930 ! Check to see if we're more than SMALL_LIMIT. 931 ! 932 subcc %o2, SMALL_LIMIT, %o3 933 bgu,a,pt %ncc, .dci_ns 934 or %o0, %o1, %o3 935 936 .dcibcp: 937 sub %g0, %o2, %o3 ! setup for copy loop 938 add %o0, %o2, %o0 939 add %o1, %o2, %o1 940 ba,pt %ncc, .dcicl 941 lduba [%o0 + %o3]ASI_USER, %o4 942 ! 943 ! %o0 and %o1 point at the end and remain pointing at the end 944 ! of their buffers. We pull things out by adding %o3 (which is 945 ! the negation of the length) to the buffer end which gives us 946 ! the curent location in the buffers. By incrementing %o3 we walk 947 ! through both buffers without having to bump each buffer's 948 ! pointer. A very fast 4 instruction loop. 949 ! 950 .align 16 951 .dcicl: 952 stb %o4, [%o1 + %o3] 953 inccc %o3 954 bl,a,pt %ncc, .dcicl 955 lduba [%o0 + %o3]ASI_USER, %o4 956 ! 957 ! We're done. Go home. 958 ! 959 membar #Sync 960 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] 961 retl 962 clr %o0 963 ! 964 ! Try aligned copies from here. 965 ! 966 .dci_ns: 967 ! 968 ! See if we're single byte aligned. If we are, check the 969 ! limit for single byte copies. If we're smaller, or equal, 970 ! bounce to the byte for byte copy loop. Otherwise do it in 971 ! HW (if enabled). 972 ! 973 btst 1, %o3 974 bz,a,pt %icc, .dcih8 975 btst 7, %o3 976 ba .dcibcp 977 nop 978 979 .dcih8: 980 ! 981 ! 8 byte aligned? 982 ! 983 bnz,a %ncc, .dcih4 984 btst 3, %o3 985 .dcis8: 986 ! 987 ! Housekeeping for copy loops. Uses same idea as in the byte for 988 ! byte copy loop above. 989 ! 990 add %o0, %o2, %o0 991 add %o1, %o2, %o1 992 sub %g0, %o2, %o3 993 ba,pt %ncc, .didebc 994 srl %o2, 3, %o2 ! Number of 8 byte chunks to copy 995 ! 996 ! 4 byte aligned? 997 ! 998 .dcih4: 999 bnz %ncc, .dcih2 1000 nop 1001 .dcis4: 1002 ! 1003 ! Housekeeping for copy loops. Uses same idea as in the byte 1004 ! for byte copy loop above. 1005 ! 1006 add %o0, %o2, %o0 1007 add %o1, %o2, %o1 1008 sub %g0, %o2, %o3 1009 ba,pt %ncc, .didfbc 1010 srl %o2, 2, %o2 ! Number of 4 byte chunks to copy 1011 .dcih2: 1012 .dcis2: 1013 add %o0, %o2, %o0 1014 add %o1, %o2, %o1 1015 sub %g0, %o2, %o3 1016 ba,pt %ncc, .didtbc 1017 srl %o2, 1, %o2 ! Number of 2 byte chunks to copy 1018 1019 .didebc: 1020 ldxa [%o0 + %o3]ASI_USER, %o4 1021 deccc %o2 1022 stx %o4, [%o1 + %o3] 1023 bg,pt %ncc, .didebc 1024 addcc %o3, 8, %o3 1025 ! 1026 ! End of copy loop. Most 8 byte aligned copies end here. 1027 ! 1028 bz,pt %ncc, .dcifh 1029 nop 1030 ! 1031 ! Something is left. Do it byte for byte. 1032 ! 1033 ba,pt %ncc, .dcicl 1034 lduba [%o0 + %o3]ASI_USER, %o4 1035 ! 1036 ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy. 1037 ! 1038 .align 32 1039 .didfbc: 1040 lduwa [%o0 + %o3]ASI_USER, %o4 1041 deccc %o2 1042 st %o4, [%o1 + %o3] 1043 bg,pt %ncc, .didfbc 1044 addcc %o3, 4, %o3 1045 ! 1046 ! End of copy loop. Most 4 byte aligned copies end here. 1047 ! 1048 bz,pt %ncc, .dcifh 1049 nop 1050 ! 1051 ! Something is left. Do it byte for byte. 1052 ! 1053 ba,pt %ncc, .dcicl 1054 lduba [%o0 + %o3]ASI_USER, %o4 1055 ! 1056 ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to 1057 ! copy. 1058 ! 1059 .align 32 1060 .didtbc: 1061 lduha [%o0 + %o3]ASI_USER, %o4 1062 deccc %o2 1063 sth %o4, [%o1 + %o3] 1064 bg,pt %ncc, .didtbc 1065 addcc %o3, 2, %o3 1066 ! 1067 ! End of copy loop. Most 2 byte aligned copies end here. 1068 ! 1069 bz,pt %ncc, .dcifh 1070 nop 1071 ! 1072 ! Deal with the last byte 1073 ! 1074 lduba [%o0 + %o3]ASI_USER, %o4 1075 stb %o4, [%o1 + %o3] 1076 .dcifh: 1077 membar #Sync 1078 stn SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1079 retl 1080 clr %o0 1081 1082 .copyin_err: 1083 ldn [THREAD_REG + T_COPYOPS], %o4 1084 brz %o4, 2f 1085 nop 1086 ldn [%o4 + CP_COPYIN], %g2 1087 jmp %g2 1088 nop 1089 2: 1090 retl 1091 mov -1, %o0 1092 SET_SIZE(copyin) 1093 1094 #endif /* lint */ 1095 1096 #ifdef lint 1097 1098 /*ARGSUSED*/ 1099 int 1100 xcopyin(const void *uaddr, void *kaddr, size_t count) 1101 { return (0); } 1102 1103 #else /* lint */ 1104 1105 ENTRY(xcopyin) 1106 sethi %hi(.xcopyin_err), REAL_LOFAULT 1107 b .do_copyin 1108 or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 1109 .xcopyin_err: 1110 ldn [THREAD_REG + T_COPYOPS], %o4 1111 brz %o4, 2f 1112 nop 1113 ldn [%o4 + CP_XCOPYIN], %g2 1114 jmp %g2 1115 nop 1116 2: 1117 retl 1118 mov %g1, %o0 1119 SET_SIZE(xcopyin) 1120 1121 #endif /* lint */ 1122 1123 #ifdef lint 1124 1125 /*ARGSUSED*/ 1126 int 1127 xcopyin_little(const void *uaddr, void *kaddr, size_t count) 1128 { return (0); } 1129 1130 #else /* lint */ 1131 1132 ENTRY(xcopyin_little) 1133 sethi %hi(.little_err), %o4 1134 ldn [THREAD_REG + T_LOFAULT], %o5 1135 or %o4, %lo(.little_err), %o4 1136 membar #Sync ! sync error barrier 1137 stn %o4, [THREAD_REG + T_LOFAULT] 1138 1139 subcc %g0, %o2, %o3 1140 add %o0, %o2, %o0 1141 bz,pn %ncc, 2f ! check for zero bytes 1142 sub %o2, 1, %o4 1143 add %o0, %o4, %o0 ! start w/last byte 1144 add %o1, %o2, %o1 1145 lduba [%o0+%o3]ASI_AIUSL, %o4 1146 1147 1: stb %o4, [%o1+%o3] 1148 inccc %o3 1149 sub %o0, 2, %o0 ! get next byte 1150 bcc,a,pt %ncc, 1b 1151 lduba [%o0+%o3]ASI_AIUSL, %o4 1152 1153 2: membar #Sync ! sync error barrier 1154 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1155 retl 1156 mov %g0, %o0 ! return (0) 1157 1158 .little_err: 1159 membar #Sync ! sync error barrier 1160 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1161 retl 1162 mov %g1, %o0 1163 SET_SIZE(xcopyin_little) 1164 1165 #endif /* lint */ 1166 1167 1168 /* 1169 * Copy a block of storage - must not overlap (from + len <= to). 1170 * No fault handler installed (to be called under on_fault()) 1171 */ 1172 #if defined(lint) 1173 1174 /* ARGSUSED */ 1175 void 1176 copyin_noerr(const void *ufrom, void *kto, size_t count) 1177 {} 1178 1179 #else /* lint */ 1180 1181 ENTRY(copyin_noerr) 1182 sethi %hi(.copyio_noerr), REAL_LOFAULT 1183 b .do_copyin 1184 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 1185 .copyio_noerr: 1186 jmp SAVED_LOFAULT 1187 nop 1188 SET_SIZE(copyin_noerr) 1189 1190 #endif /* lint */ 1191 1192 /* 1193 * Copy a block of storage - must not overlap (from + len <= to). 1194 * No fault handler installed (to be called under on_fault()) 1195 */ 1196 1197 #if defined(lint) 1198 1199 /* ARGSUSED */ 1200 void 1201 copyout_noerr(const void *kfrom, void *uto, size_t count) 1202 {} 1203 1204 #else /* lint */ 1205 1206 ENTRY(copyout_noerr) 1207 sethi %hi(.copyio_noerr), REAL_LOFAULT 1208 b .do_copyout 1209 or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 1210 SET_SIZE(copyout_noerr) 1211 1212 #endif /* lint */ 1213 1214 #if defined(lint) 1215 1216 int use_hw_bcopy = 1; 1217 int use_hw_bzero = 1; 1218 1219 #else /* !lint */ 1220 1221 .align 4 1222 DGDEF(use_hw_bcopy) 1223 .word 1 1224 DGDEF(use_hw_bzero) 1225 .word 1 1226 1227 .align 64 1228 .section ".text" 1229 #endif /* !lint */ 1230 1231 1232 /* 1233 * hwblkclr - clears block-aligned, block-multiple-sized regions that are 1234 * longer than 256 bytes in length. For the generic module we will simply 1235 * call bzero and return 1 to ensure that the pages in cache should be 1236 * flushed to ensure integrity. 1237 * Caller is responsible for ensuring use_hw_bzero is true and that 1238 * kpreempt_disable() has been called. 1239 */ 1240 #ifdef lint 1241 /*ARGSUSED*/ 1242 int 1243 hwblkclr(void *addr, size_t len) 1244 { 1245 return(0); 1246 } 1247 #else /* lint */ 1248 ! %i0 - start address 1249 ! %i1 - length of region (multiple of 64) 1250 1251 ENTRY(hwblkclr) 1252 save %sp, -SA(MINFRAME), %sp 1253 1254 ! Simply call bzero and notify the caller that bzero was used 1255 mov %i0, %o0 1256 call bzero 1257 mov %i1, %o1 1258 ret 1259 restore %g0, 1, %o0 ! return (1) - did not use block operations 1260 1261 SET_SIZE(hwblkclr) 1262 #endif /* lint */ 1263 1264 #ifdef lint 1265 /* Copy 32 bytes of data from src to dst using physical addresses */ 1266 /*ARGSUSED*/ 1267 void 1268 hw_pa_bcopy32(uint64_t src, uint64_t dst) 1269 {} 1270 #else /*!lint */ 1271 1272 /* 1273 * Copy 32 bytes of data from src (%o0) to dst (%o1) 1274 * using physical addresses. 1275 */ 1276 ENTRY_NP(hw_pa_bcopy32) 1277 rdpr %pstate, %g1 1278 andn %g1, PSTATE_IE, %g2 1279 wrpr %g0, %g2, %pstate 1280 1281 ldxa [%o0]ASI_MEM, %o2 1282 add %o0, 8, %o0 1283 ldxa [%o0]ASI_MEM, %o3 1284 add %o0, 8, %o0 1285 ldxa [%o0]ASI_MEM, %o4 1286 add %o0, 8, %o0 1287 ldxa [%o0]ASI_MEM, %o5 1288 stxa %o2, [%o1]ASI_MEM 1289 add %o1, 8, %o1 1290 stxa %o3, [%o1]ASI_MEM 1291 add %o1, 8, %o1 1292 stxa %o4, [%o1]ASI_MEM 1293 add %o1, 8, %o1 1294 stxa %o5, [%o1]ASI_MEM 1295 1296 membar #Sync 1297 retl 1298 wrpr %g0, %g1, %pstate 1299 SET_SIZE(hw_pa_bcopy32) 1300 #endif /* lint */ 1301 1302 /* 1303 * Zero a block of storage. 1304 * 1305 * uzero is used by the kernel to zero a block in user address space. 1306 */ 1307 1308 1309 #if defined(lint) 1310 1311 /* ARGSUSED */ 1312 int 1313 kzero(void *addr, size_t count) 1314 { return(0); } 1315 1316 /* ARGSUSED */ 1317 void 1318 uzero(void *addr, size_t count) 1319 {} 1320 1321 #else /* lint */ 1322 1323 ENTRY(uzero) 1324 ! 1325 ! Set a new lo_fault handler only if we came in with one 1326 ! already specified. 1327 ! 1328 wr %g0, ASI_USER, %asi 1329 ldn [THREAD_REG + T_LOFAULT], %o5 1330 tst %o5 1331 bz,pt %ncc, .do_zero 1332 sethi %hi(.zeroerr), %o2 1333 or %o2, %lo(.zeroerr), %o2 1334 membar #Sync 1335 ba,pt %ncc, .do_zero 1336 stn %o2, [THREAD_REG + T_LOFAULT] 1337 1338 ENTRY(kzero) 1339 ! 1340 ! Always set a lo_fault handler 1341 ! 1342 wr %g0, ASI_P, %asi 1343 ldn [THREAD_REG + T_LOFAULT], %o5 1344 sethi %hi(.zeroerr), %o2 1345 or %o5, LOFAULT_SET, %o5 1346 or %o2, %lo(.zeroerr), %o2 1347 membar #Sync 1348 ba,pt %ncc, .do_zero 1349 stn %o2, [THREAD_REG + T_LOFAULT] 1350 1351 /* 1352 * We got here because of a fault during kzero or if 1353 * uzero or bzero was called with t_lofault non-zero. 1354 * Otherwise we've already run screaming from the room. 1355 * Errno value is in %g1. Note that we're here iff 1356 * we did set t_lofault. 1357 */ 1358 .zeroerr: 1359 ! 1360 ! Undo asi register setting. Just set it to be the 1361 ! kernel default without checking. 1362 ! 1363 wr %g0, ASI_P, %asi 1364 1365 ! 1366 ! We did set t_lofault. It may well have been zero coming in. 1367 ! 1368 1: 1369 tst %o5 1370 membar #Sync 1371 bne,pn %ncc, 3f 1372 andncc %o5, LOFAULT_SET, %o5 1373 2: 1374 ! 1375 ! Old handler was zero. Just return the error. 1376 ! 1377 retl ! return 1378 mov %g1, %o0 ! error code from %g1 1379 3: 1380 ! 1381 ! We're here because %o5 was non-zero. It was non-zero 1382 ! because either LOFAULT_SET was present, a previous fault 1383 ! handler was present or both. In all cases we need to reset 1384 ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET 1385 ! before we either simply return the error or we invoke the 1386 ! previously specified handler. 1387 ! 1388 be %ncc, 2b 1389 stn %o5, [THREAD_REG + T_LOFAULT] 1390 jmp %o5 ! goto real handler 1391 nop 1392 SET_SIZE(kzero) 1393 SET_SIZE(uzero) 1394 1395 #endif /* lint */ 1396 1397 /* 1398 * Zero a block of storage. 1399 */ 1400 1401 #if defined(lint) 1402 1403 /* ARGSUSED */ 1404 void 1405 bzero(void *addr, size_t count) 1406 {} 1407 1408 #else /* lint */ 1409 1410 ENTRY(bzero) 1411 wr %g0, ASI_P, %asi 1412 1413 ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector 1414 tst %o5 1415 bz,pt %ncc, .do_zero 1416 sethi %hi(.zeroerr), %o2 1417 or %o2, %lo(.zeroerr), %o2 1418 membar #Sync ! sync error barrier 1419 stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 1420 1421 .do_zero: 1422 cmp %o1, 7 1423 blu,pn %ncc, .byteclr 1424 nop 1425 1426 cmp %o1, 15 1427 blu,pn %ncc, .wdalign 1428 nop 1429 1430 andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound 1431 bz,pt %ncc, .blkalign ! already double aligned 1432 sub %o3, 8, %o3 ! -(bytes till double aligned) 1433 add %o1, %o3, %o1 ! update o1 with new count 1434 1435 1: 1436 stba %g0, [%o0]%asi 1437 inccc %o3 1438 bl,pt %ncc, 1b 1439 inc %o0 1440 1441 ! Now address is double aligned 1442 .blkalign: 1443 cmp %o1, 0x80 ! check if there are 128 bytes to set 1444 blu,pn %ncc, .bzero_small 1445 mov %o1, %o3 1446 1447 andcc %o0, 0x3f, %o3 ! is block aligned? 1448 bz,pt %ncc, .bzero_blk 1449 sub %o3, 0x40, %o3 ! -(bytes till block aligned) 1450 add %o1, %o3, %o1 ! o1 is the remainder 1451 1452 ! Clear -(%o3) bytes till block aligned 1453 1: 1454 stxa %g0, [%o0]%asi 1455 addcc %o3, 8, %o3 1456 bl,pt %ncc, 1b 1457 add %o0, 8, %o0 1458 1459 .bzero_blk: 1460 and %o1, 0x3f, %o3 ! calc bytes left after blk clear 1461 andn %o1, 0x3f, %o4 ! calc size of blocks in bytes 1462 1463 cmp %o4, 0x100 ! 256 bytes or more 1464 blu,pn %ncc, 3f 1465 nop 1466 1467 2: 1468 stxa %g0, [%o0+0x0]%asi 1469 stxa %g0, [%o0+0x40]%asi 1470 stxa %g0, [%o0+0x80]%asi 1471 stxa %g0, [%o0+0xc0]%asi 1472 1473 stxa %g0, [%o0+0x8]%asi 1474 stxa %g0, [%o0+0x10]%asi 1475 stxa %g0, [%o0+0x18]%asi 1476 stxa %g0, [%o0+0x20]%asi 1477 stxa %g0, [%o0+0x28]%asi 1478 stxa %g0, [%o0+0x30]%asi 1479 stxa %g0, [%o0+0x38]%asi 1480 1481 stxa %g0, [%o0+0x48]%asi 1482 stxa %g0, [%o0+0x50]%asi 1483 stxa %g0, [%o0+0x58]%asi 1484 stxa %g0, [%o0+0x60]%asi 1485 stxa %g0, [%o0+0x68]%asi 1486 stxa %g0, [%o0+0x70]%asi 1487 stxa %g0, [%o0+0x78]%asi 1488 1489 stxa %g0, [%o0+0x88]%asi 1490 stxa %g0, [%o0+0x90]%asi 1491 stxa %g0, [%o0+0x98]%asi 1492 stxa %g0, [%o0+0xa0]%asi 1493 stxa %g0, [%o0+0xa8]%asi 1494 stxa %g0, [%o0+0xb0]%asi 1495 stxa %g0, [%o0+0xb8]%asi 1496 1497 stxa %g0, [%o0+0xc8]%asi 1498 stxa %g0, [%o0+0xd0]%asi 1499 stxa %g0, [%o0+0xd8]%asi 1500 stxa %g0, [%o0+0xe0]%asi 1501 stxa %g0, [%o0+0xe8]%asi 1502 stxa %g0, [%o0+0xf0]%asi 1503 stxa %g0, [%o0+0xf8]%asi 1504 1505 sub %o4, 0x100, %o4 1506 cmp %o4, 0x100 1507 bgu,pt %ncc, 2b 1508 add %o0, 0x100, %o0 1509 1510 3: 1511 ! ... check if 64 bytes to set 1512 cmp %o4, 0x40 1513 blu %ncc, .bzero_blk_done 1514 nop 1515 1516 4: 1517 stxa %g0, [%o0+0x0]%asi 1518 stxa %g0, [%o0+0x8]%asi 1519 stxa %g0, [%o0+0x10]%asi 1520 stxa %g0, [%o0+0x18]%asi 1521 stxa %g0, [%o0+0x20]%asi 1522 stxa %g0, [%o0+0x28]%asi 1523 stxa %g0, [%o0+0x30]%asi 1524 stxa %g0, [%o0+0x38]%asi 1525 1526 subcc %o4, 0x40, %o4 1527 bgu,pt %ncc, 3b 1528 add %o0, 0x40, %o0 1529 1530 .bzero_blk_done: 1531 membar #Sync 1532 1533 .bzero_small: 1534 ! Set the remaining doubles 1535 subcc %o3, 8, %o3 ! Can we store any doubles? 1536 blu,pn %ncc, .byteclr 1537 and %o1, 7, %o1 ! calc bytes left after doubles 1538 1539 .dbclr: 1540 stxa %g0, [%o0]%asi ! Clear the doubles 1541 subcc %o3, 8, %o3 1542 bgeu,pt %ncc, .dbclr 1543 add %o0, 8, %o0 1544 1545 ba .byteclr 1546 nop 1547 1548 .wdalign: 1549 andcc %o0, 3, %o3 ! is add aligned on a word boundary 1550 bz,pn %ncc, .wdclr 1551 andn %o1, 3, %o3 ! create word sized count in %o3 1552 1553 dec %o1 ! decrement count 1554 stba %g0, [%o0]%asi ! clear a byte 1555 ba .wdalign 1556 inc %o0 ! next byte 1557 1558 .wdclr: 1559 sta %g0, [%o0]%asi ! 4-byte clearing loop 1560 subcc %o3, 4, %o3 1561 bnz,pt %ncc, .wdclr 1562 inc 4, %o0 1563 1564 and %o1, 3, %o1 ! leftover count, if any 1565 1566 .byteclr: 1567 ! Set the leftover bytes 1568 brz %o1, .bzero_exit 1569 nop 1570 1571 7: 1572 deccc %o1 ! byte clearing loop 1573 stba %g0, [%o0]%asi 1574 bgu,pt %ncc, 7b 1575 inc %o0 1576 1577 .bzero_exit: 1578 ! 1579 ! We're just concerned with whether t_lofault was set 1580 ! when we came in. We end up here from either kzero() 1581 ! or bzero(). kzero() *always* sets a lofault handler. 1582 ! It ors LOFAULT_SET into %o5 to indicate it has done 1583 ! this even if the value of %o5 is otherwise zero. 1584 ! bzero() sets a lofault handler *only* if one was 1585 ! previously set. Accordingly we need to examine 1586 ! %o5 and if it is non-zero be sure to clear LOFAULT_SET 1587 ! before resetting the error handler. 1588 ! 1589 tst %o5 1590 bz %ncc, 1f 1591 andn %o5, LOFAULT_SET, %o5 1592 membar #Sync ! sync error barrier 1593 stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1594 1: 1595 retl 1596 clr %o0 ! return (0) 1597 1598 SET_SIZE(bzero) 1599 #endif /* lint */