1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 # ident "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/param.h>
  29 #include <sys/errno.h>
  30 #include <sys/asm_linkage.h>
  31 #include <sys/vtrace.h>
  32 #include <sys/machthread.h>
  33 #include <sys/clock.h>
  34 #include <sys/asi.h>
  35 #include <sys/fsr.h>
  36 #include <sys/privregs.h>
  37 
  38 #include "assym.h"
  39 
  40 
  41 /*
  42  * Less then or equal this number of bytes we will always copy byte-for-byte
  43  */
  44 #define SMALL_LIMIT     7
  45 
  46 /*
  47  * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
  48  * handler was set
  49  */
  50 #define LOFAULT_SET 2
  51 
  52 
  53 /*
  54  * Copy a block of storage, returning an error code if `from' or
  55  * `to' takes a kernel pagefault which cannot be resolved.
  56  * Returns errno value on pagefault error, 0 if all ok
  57  */
  58 
  59 
  60 
  61         .seg    ".text"
  62         .align  4
  63 
  64         ENTRY(kcopy)
  65 
  66         save    %sp, -SA(MINFRAME), %sp
  67         set     .copyerr, %l7                   ! copyerr is lofault value
  68         ldn     [THREAD_REG + T_LOFAULT], %o5   ! save existing handler
  69         or      %o5, LOFAULT_SET, %o5
  70         membar  #Sync                           ! sync error barrier
  71         b       .do_copy                        ! common code
  72         stn     %l7, [THREAD_REG + T_LOFAULT]   ! set t_lofault
  73 
  74 /*
  75  * We got here because of a fault during kcopy.
  76  * Errno value is in %g1.
  77  */
  78 .copyerr:
  79         ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
  80         ! into %o5 to indicate it has set t_lofault handler. Need to clear
  81         ! LOFAULT_SET flag before restoring the error handler.
  82         andn    %o5, LOFAULT_SET, %o5
  83         membar  #Sync                   ! sync error barrier
  84         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
  85         ret
  86         restore %g1, 0, %o0
  87 
  88         SET_SIZE(kcopy)
  89 
  90 
  91 /*
  92  * Copy a block of storage - must not overlap (from + len <= to).
  93  */
  94 
  95         ENTRY(bcopy)
  96 
  97         save    %sp, -SA(MINFRAME), %sp
  98         clr     %o5                     ! flag LOFAULT_SET is not set for bcopy
  99 
 100 .do_copy:
 101         mov     %i1, %g5                ! save dest addr start
 102 
 103         mov     %i2, %l6                ! save size
 104 
 105         cmp     %i2, 12                 ! for small counts
 106         blu     %ncc, .bytecp           ! just copy bytes
 107           .empty
 108 
 109         !
 110         ! use aligned transfers where possible
 111         !
 112         xor     %i0, %i1, %o4           ! xor from and to address
 113         btst    7, %o4                  ! if lower three bits zero
 114         bz      .aldoubcp               ! can align on double boundary
 115         .empty  ! assembler complaints about label
 116 
 117         xor     %i0, %i1, %o4           ! xor from and to address
 118         btst    3, %o4                  ! if lower two bits zero
 119         bz      .alwordcp               ! can align on word boundary
 120         btst    3, %i0                  ! delay slot, from address unaligned?
 121         !
 122         ! use aligned reads and writes where possible
 123         ! this differs from wordcp in that it copes
 124         ! with odd alignment between source and destnation
 125         ! using word reads and writes with the proper shifts
 126         ! in between to align transfers to and from memory
 127         ! i0 - src address, i1 - dest address, i2 - count
 128         ! i3, i4 - tmps for used generating complete word
 129         ! i5 (word to write)
 130         ! l0 size in bits of upper part of source word (US)
 131         ! l1 size in bits of lower part of source word (LS = 32 - US)
 132         ! l2 size in bits of upper part of destination word (UD)
 133         ! l3 size in bits of lower part of destination word (LD = 32 - UD)
 134         ! l4 number of bytes leftover after aligned transfers complete
 135         ! l5 the number 32
 136         !
 137         mov     32, %l5                 ! load an oft-needed constant
 138         bz      .align_dst_only
 139         btst    3, %i1                  ! is destnation address aligned?
 140         clr     %i4                     ! clear registers used in either case
 141         bz      .align_src_only
 142         clr     %l0
 143         !
 144         ! both source and destination addresses are unaligned
 145         !
 146 1:                                      ! align source
 147         ldub    [%i0], %i3              ! read a byte from source address
 148         add     %i0, 1, %i0             ! increment source address
 149         or      %i4, %i3, %i4           ! or in with previous bytes (if any)
 150         btst    3, %i0                  ! is source aligned?
 151         add     %l0, 8, %l0             ! increment size of upper source (US)
 152         bnz,a   1b
 153         sll     %i4, 8, %i4             ! make room for next byte
 154 
 155         sub     %l5, %l0, %l1           ! generate shift left count (LS)
 156         sll     %i4, %l1, %i4           ! prepare to get rest
 157         ld      [%i0], %i3              ! read a word
 158         add     %i0, 4, %i0             ! increment source address
 159         srl     %i3, %l0, %i5           ! upper src bits into lower dst bits
 160         or      %i4, %i5, %i5           ! merge
 161         mov     24, %l3                 ! align destination
 162 1:
 163         srl     %i5, %l3, %i4           ! prepare to write a single byte
 164         stb     %i4, [%i1]              ! write a byte
 165         add     %i1, 1, %i1             ! increment destination address
 166         sub     %i2, 1, %i2             ! decrement count
 167         btst    3, %i1                  ! is destination aligned?
 168         bnz,a   1b
 169         sub     %l3, 8, %l3             ! delay slot, decrement shift count (LD)
 170         sub     %l5, %l3, %l2           ! generate shift left count (UD)
 171         sll     %i5, %l2, %i5           ! move leftover into upper bytes
 172         cmp     %l2, %l0                ! cmp # reqd to fill dst w old src left
 173         bgu     %ncc, .more_needed      ! need more to fill than we have
 174         nop
 175 
 176         sll     %i3, %l1, %i3           ! clear upper used byte(s)
 177         srl     %i3, %l1, %i3
 178         ! get the odd bytes between alignments
 179         sub     %l0, %l2, %l0           ! regenerate shift count
 180         sub     %l5, %l0, %l1           ! generate new shift left count (LS)
 181         and     %i2, 3, %l4             ! must do remaining bytes if count%4 > 0
 182         andn    %i2, 3, %i2             ! # of aligned bytes that can be moved
 183         srl     %i3, %l0, %i4
 184         or      %i5, %i4, %i5
 185         st      %i5, [%i1]              ! write a word
 186         subcc   %i2, 4, %i2             ! decrement count
 187         bz      %ncc, .unalign_out
 188         add     %i1, 4, %i1             ! increment destination address
 189 
 190         b       2f
 191         sll     %i3, %l1, %i5           ! get leftover into upper bits
 192 .more_needed:
 193         sll     %i3, %l0, %i3           ! save remaining byte(s)
 194         srl     %i3, %l0, %i3
 195         sub     %l2, %l0, %l1           ! regenerate shift count
 196         sub     %l5, %l1, %l0           ! generate new shift left count
 197         sll     %i3, %l1, %i4           ! move to fill empty space
 198         b       3f
 199         or      %i5, %i4, %i5           ! merge to complete word
 200         !
 201         ! the source address is aligned and destination is not
 202         !
 203 .align_dst_only:
 204         ld      [%i0], %i4              ! read a word
 205         add     %i0, 4, %i0             ! increment source address
 206         mov     24, %l0                 ! initial shift alignment count
 207 1:
 208         srl     %i4, %l0, %i3           ! prepare to write a single byte
 209         stb     %i3, [%i1]              ! write a byte
 210         add     %i1, 1, %i1             ! increment destination address
 211         sub     %i2, 1, %i2             ! decrement count
 212         btst    3, %i1                  ! is destination aligned?
 213         bnz,a   1b
 214         sub     %l0, 8, %l0             ! delay slot, decrement shift count
 215 .xfer:
 216         sub     %l5, %l0, %l1           ! generate shift left count
 217         sll     %i4, %l1, %i5           ! get leftover
 218 3:
 219         and     %i2, 3, %l4             ! must do remaining bytes if count%4 > 0
 220         andn    %i2, 3, %i2             ! # of aligned bytes that can be moved
 221 2:
 222         ld      [%i0], %i3              ! read a source word
 223         add     %i0, 4, %i0             ! increment source address
 224         srl     %i3, %l0, %i4           ! upper src bits into lower dst bits
 225         or      %i5, %i4, %i5           ! merge with upper dest bits (leftover)
 226         st      %i5, [%i1]              ! write a destination word
 227         subcc   %i2, 4, %i2             ! decrement count
 228         bz      %ncc, .unalign_out      ! check if done
 229         add     %i1, 4, %i1             ! increment destination address
 230         b       2b                      ! loop
 231         sll     %i3, %l1, %i5           ! get leftover
 232 .unalign_out:
 233         tst     %l4                     ! any bytes leftover?
 234         bz      %ncc, .cpdone
 235         .empty                          ! allow next instruction in delay slot
 236 1:
 237         sub     %l0, 8, %l0             ! decrement shift
 238         srl     %i3, %l0, %i4           ! upper src byte into lower dst byte
 239         stb     %i4, [%i1]              ! write a byte
 240         subcc   %l4, 1, %l4             ! decrement count
 241         bz      %ncc, .cpdone           ! done?
 242         add     %i1, 1, %i1             ! increment destination
 243         tst     %l0                     ! any more previously read bytes
 244         bnz     %ncc, 1b                ! we have leftover bytes
 245         mov     %l4, %i2                ! delay slot, mv cnt where dbytecp wants
 246         b       .dbytecp                ! let dbytecp do the rest
 247         sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 248         !
 249         ! the destination address is aligned and the source is not
 250         !
 251 .align_src_only:
 252         ldub    [%i0], %i3              ! read a byte from source address
 253         add     %i0, 1, %i0             ! increment source address
 254         or      %i4, %i3, %i4           ! or in with previous bytes (if any)
 255         btst    3, %i0                  ! is source aligned?
 256         add     %l0, 8, %l0             ! increment shift count (US)
 257         bnz,a   .align_src_only
 258         sll     %i4, 8, %i4             ! make room for next byte
 259         b,a     .xfer
 260         !
 261         ! if from address unaligned for double-word moves,
 262         ! move bytes till it is, if count is < 56 it could take
 263         ! longer to align the thing than to do the transfer
 264         ! in word size chunks right away
 265         !
 266 .aldoubcp:
 267         cmp     %i2, 56                 ! if count < 56, use wordcp, it takes
 268         blu,a   %ncc, .alwordcp         ! longer to align doubles than words
 269         mov     3, %o0                  ! mask for word alignment
 270         call    .alignit                ! copy bytes until aligned
 271         mov     7, %o0                  ! mask for double alignment
 272         !
 273         ! source and destination are now double-word aligned
 274         ! i3 has aligned count returned by alignit
 275         !
 276         and     %i2, 7, %i2             ! unaligned leftover count
 277         sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 278 5:
 279         ldx     [%i0+%i1], %o4          ! read from address
 280         stx     %o4, [%i1]              ! write at destination address
 281         subcc   %i3, 8, %i3             ! dec count
 282         bgu     %ncc, 5b
 283         add     %i1, 8, %i1             ! delay slot, inc to address
 284         cmp     %i2, 4                  ! see if we can copy a word
 285         blu     %ncc, .dbytecp          ! if 3 or less bytes use bytecp
 286         .empty
 287         !
 288         ! for leftover bytes we fall into wordcp, if needed
 289         !
 290 .wordcp:
 291         and     %i2, 3, %i2             ! unaligned leftover count
 292 5:
 293         ld      [%i0+%i1], %o4          ! read from address
 294         st      %o4, [%i1]              ! write at destination address
 295         subcc   %i3, 4, %i3             ! dec count
 296         bgu     %ncc, 5b
 297         add     %i1, 4, %i1             ! delay slot, inc to address
 298         b,a     .dbytecp
 299 
 300         ! we come here to align copies on word boundaries
 301 .alwordcp:
 302         call    .alignit                ! go word-align it
 303         mov     3, %o0                  ! bits that must be zero to be aligned
 304         b       .wordcp
 305         sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 306 
 307         !
 308         ! byte copy, works with any alignment
 309         !
 310 .bytecp:
 311         b       .dbytecp
 312         sub     %i0, %i1, %i0           ! i0 gets difference of src and dst
 313 
 314         !
 315         ! differenced byte copy, works with any alignment
 316         ! assumes dest in %i1 and (source - dest) in %i0
 317         !
 318 1:
 319         stb     %o4, [%i1]              ! write to address
 320         inc     %i1                     ! inc to address
 321 .dbytecp:
 322         deccc   %i2                     ! dec count
 323         bgeu,a  %ncc, 1b                ! loop till done
 324         ldub    [%i0+%i1], %o4          ! read from address
 325 .cpdone:
 326         membar  #Sync                           ! sync error barrier
 327         ! Restore t_lofault handler, if came here from kcopy().
 328         tst     %o5
 329         bz      %ncc, 1f
 330         andn    %o5, LOFAULT_SET, %o5
 331         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 332 1:
 333         mov     %g5, %o0                ! copy dest address
 334         call    sync_icache
 335         mov     %l6, %o1                ! saved size
 336         ret
 337         restore %g0, 0, %o0             ! return (0)
 338 
 339 /*
 340  * Common code used to align transfers on word and doubleword
 341  * boudaries.  Aligns source and destination and returns a count
 342  * of aligned bytes to transfer in %i3
 343  */
 344 1:
 345         inc     %i0                     ! inc from
 346         stb     %o4, [%i1]              ! write a byte
 347         inc     %i1                     ! inc to
 348         dec     %i2                     ! dec count
 349 .alignit:
 350         btst    %o0, %i0                ! %o0 is bit mask to check for alignment
 351         bnz,a   1b
 352         ldub    [%i0], %o4              ! read next byte
 353 
 354         retl
 355         andn    %i2, %o0, %i3           ! return size of aligned bytes
 356         SET_SIZE(bcopy)
 357 
 358 /*
 359  * Block copy with possibly overlapped operands.
 360  */
 361 
 362         ENTRY(ovbcopy)
 363         tst     %o2                     ! check count
 364         bgu,a   %ncc, 1f                ! nothing to do or bad arguments
 365         subcc   %o0, %o1, %o3           ! difference of from and to address
 366 
 367         retl                            ! return
 368         nop
 369 1:
 370         bneg,a  %ncc, 2f
 371         neg     %o3                     ! if < 0, make it positive
 372 2:      cmp     %o2, %o3                ! cmp size and abs(from - to)
 373         bleu    %ncc, bcopy             ! if size <= abs(diff): use bcopy,
 374         .empty                          !   no overlap
 375         cmp     %o0, %o1                ! compare from and to addresses
 376         blu     %ncc, .ov_bkwd          ! if from < to, copy backwards
 377         nop
 378         !
 379         ! Copy forwards.
 380         !
 381 .ov_fwd:
 382         ldub    [%o0], %o3              ! read from address
 383         inc     %o0                     ! inc from address
 384         stb     %o3, [%o1]              ! write to address
 385         deccc   %o2                     ! dec count
 386         bgu     %ncc, .ov_fwd           ! loop till done
 387         inc     %o1                     ! inc to address
 388 
 389         retl                            ! return
 390         nop
 391         !
 392         ! Copy backwards.
 393         !
 394 .ov_bkwd:
 395         deccc   %o2                     ! dec count
 396         ldub    [%o0 + %o2], %o3        ! get byte at end of src
 397         bgu     %ncc, .ov_bkwd          ! loop till done
 398         stb     %o3, [%o1 + %o2]        ! delay slot, store at end of dst
 399 
 400         retl                            ! return
 401         nop
 402         SET_SIZE(ovbcopy)
 403 
 404 /*
 405  * hwblkpagecopy()
 406  *
 407  * Copies exactly one page.  This routine assumes the caller (ppcopy)
 408  * has already disabled kernel preemption and has checked
 409  * use_hw_bcopy.
 410  */
 411         ENTRY(hwblkpagecopy)
 412         save    %sp, -SA(MINFRAME), %sp
 413 
 414         ! %i0 - source address (arg)
 415         ! %i1 - destination address (arg)
 416         ! %i2 - length of region (not arg)
 417 
 418         set     PAGESIZE, %i2
 419         mov     %i1,    %o0     ! store destination address for flushing
 420 
 421         /*
 422          * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 
 423          */
 424 1:
 425         ldx     [%i0+0x0], %l0
 426         ldx     [%i0+0x8], %l1
 427         ldx     [%i0+0x10], %l2
 428         ldx     [%i0+0x18], %l3
 429         ldx     [%i0+0x20], %l4
 430         ldx     [%i0+0x28], %l5
 431         ldx     [%i0+0x30], %l6
 432         ldx     [%i0+0x38], %l7
 433         stx     %l0, [%i1+0x0]
 434         stx     %l1, [%i1+0x8]
 435         stx     %l2, [%i1+0x10]
 436         stx     %l3, [%i1+0x18]
 437         stx     %l4, [%i1+0x20]
 438         stx     %l5, [%i1+0x28]
 439         stx     %l6, [%i1+0x30]
 440         stx     %l7, [%i1+0x38]
 441 
 442         ldx     [%i0+0x40], %l0
 443         ldx     [%i0+0x48], %l1
 444         ldx     [%i0+0x50], %l2
 445         ldx     [%i0+0x58], %l3
 446         ldx     [%i0+0x60], %l4
 447         ldx     [%i0+0x68], %l5
 448         ldx     [%i0+0x70], %l6
 449         ldx     [%i0+0x78], %l7
 450         stx     %l0, [%i1+0x40]
 451         stx     %l1, [%i1+0x48]
 452         stx     %l2, [%i1+0x50]
 453         stx     %l3, [%i1+0x58]
 454         stx     %l4, [%i1+0x60]
 455         stx     %l5, [%i1+0x68]
 456         stx     %l6, [%i1+0x70]
 457         stx     %l7, [%i1+0x78]
 458 
 459         add     %i0, 0x80, %i0
 460         subcc   %i2, 0x80, %i2
 461         bgu,pt  %xcc, 1b
 462         add     %i1, 0x80, %i1
 463 
 464         ! %o0 contains the dest. address
 465         set     PAGESIZE, %o1
 466         call    sync_icache
 467         nop
 468 
 469         membar #Sync
 470         ret
 471         restore %g0, 0, %o0
 472         SET_SIZE(hwblkpagecopy)
 473 
 474 
 475 /*
 476  * Transfer data to and from user space -
 477  * Note that these routines can cause faults
 478  * It is assumed that the kernel has nothing at
 479  * less than KERNELBASE in the virtual address space.
 480  *
 481  * Note that copyin(9F) and copyout(9F) are part of the
 482  * DDI/DKI which specifies that they return '-1' on "errors."
 483  *
 484  * Sigh.
 485  *
 486  * So there's two extremely similar routines - xcopyin() and xcopyout()
 487  * which return the errno that we've faithfully computed.  This
 488  * allows other callers (e.g. uiomove(9F)) to work correctly.
 489  * Given that these are used pretty heavily, we expand the calling
 490  * sequences inline for all flavours (rather than making wrappers).
 491  *
 492  * There are also stub routines for xcopyout_little and xcopyin_little,
 493  * which currently are intended to handle requests of <= 16 bytes from
 494  * do_unaligned. Future enhancement to make them handle 8k pages efficiently
 495  * is left as an exercise...
 496  */
 497 
 498 /*
 499  * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
 500  *
 501  * General theory of operation:
 502  *
 503  * None of the copyops routines grab a window.
 504  *
 505  * Flow:
 506  *
 507  * If count == zero return zero.
 508  *
 509  * Store the previous lo_fault handler into %g6.
 510  * Place our secondary lofault handler into %g5.
 511  * Place the address of our fault handler into %o3.
 512  *
 513  * If count is less than or equal to SMALL_LIMIT (7) we
 514  * always do a byte for byte copy.
 515  *
 516  * If count is > SMALL_LIMIT, we check the alignment of the input
 517  * and output pointers.  We store -count in %o3, we store the number
 518  * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
 519  * in %o2. Following this we branch to the appropriate copy loop and
 520  * copy that many chunks.  Since we've been adding the chunk size
 521  * to %o3 each time through as well as decrementing %o2, we can tell
 522  * if any data is is left to be copied by examining %o3. If that is
 523  * zero, we're done and can go home. If not, we figure out what the
 524  * largest chunk size left to be copied is and branch to that copy
 525  * loop unless there's only one byte left. We load that as we're
 526  * branching to code that stores it just before we return.
 527  *
 528  * Fault handlers are invoked if we reference memory that has no
 529  * current mapping.  All forms share the same copyio_fault handler.
 530  * This routine handles fixing up the stack and general housecleaning.
 531  * Each copy operation has a simple fault handler that is then called
 532  * to do the work specific to the invidual operation.  The handler
 533  * for copyOP and xcopyOP are found at the end of individual function.
 534  * The handlers for xcopyOP_little are found at the end of xcopyin_little.
 535  * The handlers for copyOP_noerr are found at the end of copyin_noerr.
 536  */
 537 
 538 /*
 539  * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
 540  */
 541 
 542 /*
 543  * We save the arguments in the following registers in case of a fault:
 544  *      kaddr - %g2
 545  *      uaddr - %g3
 546  *      count - %g4
 547  */
 548 #define SAVE_SRC        %g2
 549 #define SAVE_DST        %g3
 550 #define SAVE_COUNT      %g4
 551 
 552 #define REAL_LOFAULT            %g5
 553 #define SAVED_LOFAULT           %g6
 554 
 555 /*
 556  * Generic copyio fault handler.  This is the first line of defense when a 
 557  * fault occurs in (x)copyin/(x)copyout.  In order for this to function
 558  * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
 559  * This allows us to share common code for all the flavors of the copy
 560  * operations, including the _noerr versions.
 561  *
 562  * Note that this function will restore the original input parameters before
 563  * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
 564  * member of the t_copyop structure, if needed.
 565  */
 566         ENTRY(copyio_fault)
 567         membar  #Sync
 568         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
 569 
 570         mov     SAVE_SRC, %o0
 571         mov     SAVE_DST, %o1
 572         jmp     REAL_LOFAULT
 573           mov   SAVE_COUNT, %o2
 574         SET_SIZE(copyio_fault)
 575 
 576         ENTRY(copyout)
 577         sethi   %hi(.copyout_err), REAL_LOFAULT
 578         or      REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
 579 
 580 .do_copyout:
 581         !
 582         ! Check the length and bail if zero.
 583         !
 584         tst     %o2
 585         bnz,pt  %ncc, 1f
 586           nop
 587         retl
 588           clr   %o0
 589 1:
 590         sethi   %hi(copyio_fault), %o3
 591         ldn     [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
 592         or      %o3, %lo(copyio_fault), %o3
 593         membar  #Sync
 594         stn     %o3, [THREAD_REG + T_LOFAULT]
 595 
 596         mov     %o0, SAVE_SRC
 597         mov     %o1, SAVE_DST
 598         mov     %o2, SAVE_COUNT
 599 
 600         !
 601         ! Check to see if we're more than SMALL_LIMIT (7 bytes).
 602         ! Run in leaf mode, using the %o regs as our input regs.
 603         !
 604         subcc   %o2, SMALL_LIMIT, %o3
 605         bgu,a,pt %ncc, .dco_ns
 606         or      %o0, %o1, %o3
 607 
 608 .dcobcp:
 609         sub     %g0, %o2, %o3           ! negate count
 610         add     %o0, %o2, %o0           ! make %o0 point at the end
 611         add     %o1, %o2, %o1           ! make %o1 point at the end
 612         ba,pt   %ncc, .dcocl
 613         ldub    [%o0 + %o3], %o4        ! load first byte
 614         !
 615         ! %o0 and %o2 point at the end and remain pointing at the end
 616         ! of their buffers. We pull things out by adding %o3 (which is
 617         ! the negation of the length) to the buffer end which gives us
 618         ! the curent location in the buffers. By incrementing %o3 we walk
 619         ! through both buffers without having to bump each buffer's
 620         ! pointer. A very fast 4 instruction loop.
 621         !
 622         .align 16
 623 .dcocl:
 624         stba    %o4, [%o1 + %o3]ASI_USER
 625         inccc   %o3
 626         bl,a,pt %ncc, .dcocl
 627         ldub    [%o0 + %o3], %o4
 628         !
 629         ! We're done. Go home.
 630         !
 631         membar  #Sync
 632         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
 633         retl
 634         clr     %o0
 635         !
 636         ! Try aligned copies from here.
 637         !
 638 .dco_ns:
 639         ! %o0 = kernel addr (to be copied from)
 640         ! %o1 = user addr (to be copied to)
 641         ! %o2 = length
 642         ! %o3 = %o1 | %o2 (used for alignment checking)
 643         ! %o4 is alternate lo_fault
 644         ! %o5 is original lo_fault
 645         !
 646         ! See if we're single byte aligned. If we are, check the
 647         ! limit for single byte copies. If we're smaller or equal,
 648         ! bounce to the byte for byte copy loop. Otherwise do it in
 649         ! HW (if enabled).
 650         !
 651         btst    1, %o3
 652         bz,pt   %icc, .dcoh8
 653         btst    7, %o3
 654 
 655         ba      .dcobcp
 656         nop
 657 .dcoh8:
 658         !
 659         ! 8 byte aligned?
 660         !
 661         bnz,a   %ncc, .dcoh4
 662         btst    3, %o3
 663 .dcos8:
 664         !
 665         ! Housekeeping for copy loops. Uses same idea as in the byte for
 666         ! byte copy loop above.
 667         !
 668         add     %o0, %o2, %o0
 669         add     %o1, %o2, %o1
 670         sub     %g0, %o2, %o3
 671         ba,pt   %ncc, .dodebc
 672         srl     %o2, 3, %o2             ! Number of 8 byte chunks to copy
 673         !
 674         ! 4 byte aligned?
 675         !
 676 .dcoh4:
 677         bnz,pn  %ncc, .dcoh2
 678         nop
 679 .dcos4:
 680         add     %o0, %o2, %o0
 681         add     %o1, %o2, %o1
 682         sub     %g0, %o2, %o3
 683         ba,pt   %ncc, .dodfbc
 684         srl     %o2, 2, %o2             ! Number of 4 byte chunks to copy
 685         !
 686         ! We must be 2 byte aligned. Off we go.
 687         ! The check for small copies was done in the
 688         ! delay at .dcoh4
 689         !
 690 .dcoh2:
 691 .dcos2:
 692         add     %o0, %o2, %o0
 693         add     %o1, %o2, %o1
 694         sub     %g0, %o2, %o3
 695         ba,pt   %ncc, .dodtbc
 696         srl     %o2, 1, %o2             ! Number of 2 byte chunks to copy
 697 
 698 .dodebc:
 699         ldx     [%o0 + %o3], %o4
 700         deccc   %o2
 701         stxa    %o4, [%o1 + %o3]ASI_USER
 702         bg,pt   %ncc, .dodebc
 703         addcc   %o3, 8, %o3
 704         !
 705         ! End of copy loop. Check to see if we're done. Most
 706         ! eight byte aligned copies end here.
 707         !
 708         bz,pt   %ncc, .dcofh
 709         nop
 710         !
 711         ! Something is left - do it byte for byte.
 712         ! 
 713         ba,pt   %ncc, .dcocl
 714         ldub    [%o0 + %o3], %o4        ! load next byte
 715         !
 716         ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
 717         !
 718         .align 32
 719 .dodfbc:
 720         lduw    [%o0 + %o3], %o4
 721         deccc   %o2
 722         sta     %o4, [%o1 + %o3]ASI_USER
 723         bg,pt   %ncc, .dodfbc
 724         addcc   %o3, 4, %o3
 725         !
 726         ! End of copy loop. Check to see if we're done. Most
 727         ! four byte aligned copies end here.
 728         !
 729         bz,pt   %ncc, .dcofh
 730         nop
 731         !
 732         ! Something is left. Do it byte for byte.
 733         !
 734         ba,pt   %ncc, .dcocl
 735         ldub    [%o0 + %o3], %o4        ! load next byte
 736         !
 737         ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
 738         ! copy.
 739         !
 740         .align 32
 741 .dodtbc:
 742         lduh    [%o0 + %o3], %o4
 743         deccc   %o2
 744         stha    %o4, [%o1 + %o3]ASI_USER
 745         bg,pt   %ncc, .dodtbc
 746         addcc   %o3, 2, %o3
 747         !
 748         ! End of copy loop. Anything left?
 749         !
 750         bz,pt   %ncc, .dcofh
 751         nop
 752         !
 753         ! Deal with the last byte
 754         !
 755         ldub    [%o0 + %o3], %o4
 756         stba    %o4, [%o1 + %o3]ASI_USER
 757 .dcofh:
 758         membar  #Sync
 759         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
 760         retl
 761         clr     %o0
 762 
 763 .copyout_err:
 764         ldn     [THREAD_REG + T_COPYOPS], %o4
 765         brz     %o4, 2f
 766         nop
 767         ldn     [%o4 + CP_COPYOUT], %g2
 768         jmp     %g2
 769         nop
 770 2:
 771         retl
 772         mov     -1, %o0
 773         SET_SIZE(copyout)
 774 
 775 
 776         ENTRY(xcopyout)
 777         sethi   %hi(.xcopyout_err), REAL_LOFAULT
 778         b       .do_copyout
 779           or    REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
 780 .xcopyout_err:
 781         ldn     [THREAD_REG + T_COPYOPS], %o4
 782         brz     %o4, 2f
 783         nop
 784         ldn     [%o4 + CP_XCOPYOUT], %g2
 785         jmp     %g2
 786         nop
 787 2:
 788         retl
 789         mov     %g1, %o0
 790         SET_SIZE(xcopyout)
 791 
 792         ENTRY(xcopyout_little)
 793         sethi   %hi(.little_err), %o4
 794         ldn     [THREAD_REG + T_LOFAULT], %o5
 795         or      %o4, %lo(.little_err), %o4
 796         membar  #Sync                   ! sync error barrier
 797         stn     %o4, [THREAD_REG + T_LOFAULT]
 798 
 799         subcc   %g0, %o2, %o3
 800         add     %o0, %o2, %o0
 801         bz,pn   %ncc, 2f                ! check for zero bytes
 802         sub     %o2, 1, %o4
 803         add     %o0, %o4, %o0           ! start w/last byte
 804         add     %o1, %o2, %o1
 805         ldub    [%o0+%o3], %o4
 806 
 807 1:      stba    %o4, [%o1+%o3]ASI_AIUSL
 808         inccc   %o3
 809         sub     %o0, 2, %o0             ! get next byte
 810         bcc,a,pt %ncc, 1b
 811           ldub  [%o0+%o3], %o4
 812 
 813 2:      membar  #Sync                   ! sync error barrier
 814         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 815         retl
 816         mov     %g0, %o0                ! return (0)
 817         SET_SIZE(xcopyout_little)
 818 
 819 /*
 820  * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
 821  */
 822 
 823         ENTRY(copyin)
 824         sethi   %hi(.copyin_err), REAL_LOFAULT
 825         or      REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
 826 
 827 .do_copyin:
 828         !
 829         ! Check the length and bail if zero.
 830         !
 831         tst     %o2
 832         bnz,pt  %ncc, 1f
 833           nop
 834         retl
 835           clr   %o0
 836 1:
 837         sethi   %hi(copyio_fault), %o3
 838         ldn     [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
 839         or      %o3, %lo(copyio_fault), %o3
 840         membar  #Sync
 841         stn     %o3, [THREAD_REG + T_LOFAULT]
 842 
 843         mov     %o0, SAVE_SRC
 844         mov     %o1, SAVE_DST
 845         mov     %o2, SAVE_COUNT
 846 
 847         !
 848         ! Check to see if we're more than SMALL_LIMIT.
 849         !
 850         subcc   %o2, SMALL_LIMIT, %o3
 851         bgu,a,pt %ncc, .dci_ns
 852         or      %o0, %o1, %o3
 853 
 854 .dcibcp:
 855         sub     %g0, %o2, %o3           ! setup for copy loop
 856         add     %o0, %o2, %o0
 857         add     %o1, %o2, %o1
 858         ba,pt   %ncc, .dcicl
 859         lduba   [%o0 + %o3]ASI_USER, %o4
 860         !
 861         ! %o0 and %o1 point at the end and remain pointing at the end
 862         ! of their buffers. We pull things out by adding %o3 (which is
 863         ! the negation of the length) to the buffer end which gives us
 864         ! the curent location in the buffers. By incrementing %o3 we walk
 865         ! through both buffers without having to bump each buffer's
 866         ! pointer. A very fast 4 instruction loop.
 867         !
 868         .align 16
 869 .dcicl:
 870         stb     %o4, [%o1 + %o3]
 871         inccc   %o3
 872         bl,a,pt %ncc, .dcicl
 873         lduba   [%o0 + %o3]ASI_USER, %o4
 874         !
 875         ! We're done. Go home.
 876         !       
 877         membar  #Sync
 878         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
 879         retl
 880         clr     %o0
 881         !
 882         ! Try aligned copies from here.
 883         !
 884 .dci_ns:
 885         !
 886         ! See if we're single byte aligned. If we are, check the
 887         ! limit for single byte copies. If we're smaller, or equal,
 888         ! bounce to the byte for byte copy loop. Otherwise do it in
 889         ! HW (if enabled).
 890         !
 891         btst    1, %o3
 892         bz,a,pt %icc, .dcih8
 893         btst    7, %o3
 894         ba      .dcibcp
 895         nop
 896 
 897 .dcih8:
 898         !
 899         ! 8 byte aligned?
 900         !
 901         bnz,a   %ncc, .dcih4
 902         btst    3, %o3
 903 .dcis8:
 904         !
 905         ! Housekeeping for copy loops. Uses same idea as in the byte for
 906         ! byte copy loop above.
 907         !
 908         add     %o0, %o2, %o0
 909         add     %o1, %o2, %o1
 910         sub     %g0, %o2, %o3
 911         ba,pt   %ncc, .didebc
 912         srl     %o2, 3, %o2             ! Number of 8 byte chunks to copy
 913         !
 914         ! 4 byte aligned?
 915         !
 916 .dcih4:
 917         bnz     %ncc, .dcih2
 918         nop
 919 .dcis4:
 920         !
 921         ! Housekeeping for copy loops. Uses same idea as in the byte
 922         ! for byte copy loop above.
 923         !
 924         add     %o0, %o2, %o0
 925         add     %o1, %o2, %o1
 926         sub     %g0, %o2, %o3
 927         ba,pt   %ncc, .didfbc
 928         srl     %o2, 2, %o2             ! Number of 4 byte chunks to copy
 929 .dcih2:
 930 .dcis2:
 931         add     %o0, %o2, %o0
 932         add     %o1, %o2, %o1
 933         sub     %g0, %o2, %o3
 934         ba,pt   %ncc, .didtbc
 935         srl     %o2, 1, %o2             ! Number of 2 byte chunks to copy
 936 
 937 .didebc:
 938         ldxa    [%o0 + %o3]ASI_USER, %o4
 939         deccc   %o2
 940         stx     %o4, [%o1 + %o3]
 941         bg,pt   %ncc, .didebc
 942         addcc   %o3, 8, %o3
 943         !
 944         ! End of copy loop. Most 8 byte aligned copies end here.
 945         !
 946         bz,pt   %ncc, .dcifh
 947         nop
 948         !
 949         ! Something is left. Do it byte for byte.
 950         !
 951         ba,pt   %ncc, .dcicl
 952         lduba   [%o0 + %o3]ASI_USER, %o4
 953         !
 954         ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
 955         !
 956         .align 32
 957 .didfbc:
 958         lduwa   [%o0 + %o3]ASI_USER, %o4
 959         deccc   %o2
 960         st      %o4, [%o1 + %o3]
 961         bg,pt   %ncc, .didfbc
 962         addcc   %o3, 4, %o3
 963         !
 964         ! End of copy loop. Most 4 byte aligned copies end here.
 965         !
 966         bz,pt   %ncc, .dcifh
 967         nop
 968         !
 969         ! Something is left. Do it byte for byte.
 970         !
 971         ba,pt   %ncc, .dcicl
 972         lduba   [%o0 + %o3]ASI_USER, %o4
 973         !
 974         ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
 975         ! copy.
 976         !
 977         .align 32
 978 .didtbc:
 979         lduha   [%o0 + %o3]ASI_USER, %o4
 980         deccc   %o2
 981         sth     %o4, [%o1 + %o3]
 982         bg,pt   %ncc, .didtbc
 983         addcc   %o3, 2, %o3
 984         !
 985         ! End of copy loop. Most 2 byte aligned copies end here.
 986         !
 987         bz,pt   %ncc, .dcifh
 988         nop
 989         !
 990         ! Deal with the last byte
 991         !
 992         lduba   [%o0 + %o3]ASI_USER, %o4
 993         stb     %o4, [%o1 + %o3]
 994 .dcifh:
 995         membar  #Sync
 996         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 997         retl
 998         clr     %o0
 999 
1000 .copyin_err:
1001         ldn     [THREAD_REG + T_COPYOPS], %o4
1002         brz     %o4, 2f
1003         nop
1004         ldn     [%o4 + CP_COPYIN], %g2
1005         jmp     %g2
1006         nop
1007 2:
1008         retl
1009         mov     -1, %o0
1010         SET_SIZE(copyin)
1011 
1012         ENTRY(xcopyin)
1013         sethi   %hi(.xcopyin_err), REAL_LOFAULT
1014         b       .do_copyin
1015           or    REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
1016 .xcopyin_err:
1017         ldn     [THREAD_REG + T_COPYOPS], %o4
1018         brz     %o4, 2f
1019         nop
1020         ldn     [%o4 + CP_XCOPYIN], %g2
1021         jmp     %g2
1022         nop
1023 2:
1024         retl
1025         mov     %g1, %o0
1026         SET_SIZE(xcopyin)
1027 
1028         ENTRY(xcopyin_little)
1029         sethi   %hi(.little_err), %o4
1030         ldn     [THREAD_REG + T_LOFAULT], %o5
1031         or      %o4, %lo(.little_err), %o4
1032         membar  #Sync                           ! sync error barrier
1033         stn     %o4, [THREAD_REG + T_LOFAULT]   
1034 
1035         subcc   %g0, %o2, %o3
1036         add     %o0, %o2, %o0
1037         bz,pn   %ncc, 2f                ! check for zero bytes
1038         sub     %o2, 1, %o4
1039         add     %o0, %o4, %o0           ! start w/last byte     
1040         add     %o1, %o2, %o1
1041         lduba   [%o0+%o3]ASI_AIUSL, %o4
1042 
1043 1:      stb     %o4, [%o1+%o3]
1044         inccc   %o3
1045         sub     %o0, 2, %o0             ! get next byte
1046         bcc,a,pt %ncc, 1b
1047           lduba [%o0+%o3]ASI_AIUSL, %o4
1048 
1049 2:      membar  #Sync                           ! sync error barrier
1050         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1051         retl
1052         mov     %g0, %o0                ! return (0)
1053 
1054 .little_err:
1055         membar  #Sync                           ! sync error barrier
1056         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1057         retl
1058         mov     %g1, %o0
1059         SET_SIZE(xcopyin_little)
1060 
1061 
1062 /*
1063  * Copy a block of storage - must not overlap (from + len <= to).
1064  * No fault handler installed (to be called under on_fault())
1065  */
1066 
1067         ENTRY(copyin_noerr)
1068         sethi   %hi(.copyio_noerr), REAL_LOFAULT
1069         b       .do_copyin
1070           or    REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1071 .copyio_noerr:
1072         jmp     SAVED_LOFAULT
1073           nop
1074         SET_SIZE(copyin_noerr)
1075 
1076 /*
1077  * Copy a block of storage - must not overlap (from + len <= to).
1078  * No fault handler installed (to be called under on_fault())
1079  */
1080 
1081         ENTRY(copyout_noerr)
1082         sethi   %hi(.copyio_noerr), REAL_LOFAULT
1083         b       .do_copyout
1084           or    REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1085         SET_SIZE(copyout_noerr)
1086 
1087         .align  4
1088         DGDEF(use_hw_bcopy)
1089         .word   1
1090         DGDEF(use_hw_bzero)
1091         .word   1
1092 
1093         .align  64
1094         .section ".text"
1095 
1096 
1097 /*
1098  * hwblkclr - clears block-aligned, block-multiple-sized regions that are
1099  * longer than 256 bytes in length. For the generic module we will simply
1100  * call bzero and return 1 to ensure that the pages in cache should be
1101  * flushed to ensure integrity.
1102  * Caller is responsible for ensuring use_hw_bzero is true and that
1103  * kpreempt_disable() has been called.
1104  */
1105         ! %i0 - start address
1106         ! %i1 - length of region (multiple of 64)
1107 
1108         ENTRY(hwblkclr)
1109         save    %sp, -SA(MINFRAME), %sp
1110 
1111         ! Simply call bzero and notify the caller that bzero was used
1112         mov     %i0, %o0
1113         call    bzero
1114           mov   %i1, %o1
1115         ret
1116         restore %g0, 1, %o0     ! return (1) - did not use block operations
1117 
1118         SET_SIZE(hwblkclr)
1119 
1120         /*
1121          * Copy 32 bytes of data from src (%o0) to dst (%o1)
1122          * using physical addresses.
1123          */
1124         ENTRY_NP(hw_pa_bcopy32)
1125         rdpr    %pstate, %g1
1126         andn    %g1, PSTATE_IE, %g2
1127         wrpr    %g0, %g2, %pstate
1128 
1129         ldxa    [%o0]ASI_MEM, %o2
1130         add     %o0, 8, %o0
1131         ldxa    [%o0]ASI_MEM, %o3
1132         add     %o0, 8, %o0
1133         ldxa    [%o0]ASI_MEM, %o4
1134         add     %o0, 8, %o0
1135         ldxa    [%o0]ASI_MEM, %o5
1136         stxa    %o2, [%o1]ASI_MEM
1137         add     %o1, 8, %o1
1138         stxa    %o3, [%o1]ASI_MEM
1139         add     %o1, 8, %o1
1140         stxa    %o4, [%o1]ASI_MEM
1141         add     %o1, 8, %o1
1142         stxa    %o5, [%o1]ASI_MEM
1143 
1144         membar  #Sync
1145         retl
1146           wrpr    %g0, %g1, %pstate
1147         SET_SIZE(hw_pa_bcopy32)
1148 
1149 /*
1150  * Zero a block of storage.
1151  *
1152  * uzero is used by the kernel to zero a block in user address space.
1153  */
1154 
1155 
1156         ENTRY(uzero)
1157         !
1158         ! Set a new lo_fault handler only if we came in with one
1159         ! already specified.
1160         !
1161         wr      %g0, ASI_USER, %asi
1162         ldn     [THREAD_REG + T_LOFAULT], %o5
1163         tst     %o5
1164         bz,pt   %ncc, .do_zero
1165         sethi   %hi(.zeroerr), %o2
1166         or      %o2, %lo(.zeroerr), %o2
1167         membar  #Sync
1168         ba,pt   %ncc, .do_zero
1169         stn     %o2, [THREAD_REG + T_LOFAULT]
1170 
1171         ENTRY(kzero)
1172         !
1173         ! Always set a lo_fault handler
1174         !
1175         wr      %g0, ASI_P, %asi
1176         ldn     [THREAD_REG + T_LOFAULT], %o5
1177         sethi   %hi(.zeroerr), %o2
1178         or      %o5, LOFAULT_SET, %o5
1179         or      %o2, %lo(.zeroerr), %o2
1180         membar  #Sync
1181         ba,pt   %ncc, .do_zero
1182         stn     %o2, [THREAD_REG + T_LOFAULT]
1183 
1184 /*
1185  * We got here because of a fault during kzero or if
1186  * uzero or bzero was called with t_lofault non-zero.
1187  * Otherwise we've already run screaming from the room.
1188  * Errno value is in %g1. Note that we're here iff
1189  * we did set t_lofault.
1190  */
1191 .zeroerr:
1192         !
1193         ! Undo asi register setting. Just set it to be the
1194         ! kernel default without checking.
1195         !
1196         wr      %g0, ASI_P, %asi
1197 
1198         !
1199         ! We did set t_lofault. It may well have been zero coming in.
1200         !
1201 1:
1202         tst     %o5
1203         membar #Sync
1204         bne,pn  %ncc, 3f                
1205         andncc  %o5, LOFAULT_SET, %o5
1206 2:
1207         !
1208         ! Old handler was zero. Just return the error.
1209         !
1210         retl                            ! return
1211         mov     %g1, %o0                ! error code from %g1
1212 3:
1213         !
1214         ! We're here because %o5 was non-zero. It was non-zero
1215         ! because either LOFAULT_SET was present, a previous fault
1216         ! handler was present or both. In all cases we need to reset
1217         ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
1218         ! before we either simply return the error or we invoke the
1219         ! previously specified handler.
1220         !
1221         be      %ncc, 2b
1222         stn     %o5, [THREAD_REG + T_LOFAULT]
1223         jmp     %o5                     ! goto real handler
1224           nop
1225         SET_SIZE(kzero)
1226         SET_SIZE(uzero)
1227 
1228 /*
1229  * Zero a block of storage.
1230  */
1231 
1232         ENTRY(bzero)
1233         wr      %g0, ASI_P, %asi
1234 
1235         ldn     [THREAD_REG + T_LOFAULT], %o5   ! save old vector
1236         tst     %o5
1237         bz,pt   %ncc, .do_zero
1238         sethi   %hi(.zeroerr), %o2
1239         or      %o2, %lo(.zeroerr), %o2
1240         membar  #Sync                           ! sync error barrier
1241         stn     %o2, [THREAD_REG + T_LOFAULT]   ! install new vector
1242 
1243 .do_zero:
1244         cmp     %o1, 7
1245         blu,pn  %ncc, .byteclr
1246         nop
1247 
1248         cmp     %o1, 15
1249         blu,pn  %ncc, .wdalign
1250         nop
1251 
1252         andcc   %o0, 7, %o3             ! is add aligned on a 8 byte bound
1253         bz,pt   %ncc, .blkalign         ! already double aligned
1254         sub     %o3, 8, %o3             ! -(bytes till double aligned)
1255         add     %o1, %o3, %o1           ! update o1 with new count
1256 
1257 1:
1258         stba    %g0, [%o0]%asi
1259         inccc   %o3
1260         bl,pt   %ncc, 1b
1261         inc     %o0
1262 
1263         ! Now address is double aligned
1264 .blkalign:
1265         cmp     %o1, 0x80               ! check if there are 128 bytes to set
1266         blu,pn  %ncc, .bzero_small
1267         mov     %o1, %o3
1268 
1269         andcc   %o0, 0x3f, %o3          ! is block aligned?
1270         bz,pt   %ncc, .bzero_blk
1271         sub     %o3, 0x40, %o3          ! -(bytes till block aligned)
1272         add     %o1, %o3, %o1           ! o1 is the remainder
1273         
1274         ! Clear -(%o3) bytes till block aligned
1275 1:
1276         stxa    %g0, [%o0]%asi
1277         addcc   %o3, 8, %o3
1278         bl,pt   %ncc, 1b
1279         add     %o0, 8, %o0
1280 
1281 .bzero_blk:
1282         and     %o1, 0x3f, %o3          ! calc bytes left after blk clear
1283         andn    %o1, 0x3f, %o4          ! calc size of blocks in bytes
1284 
1285         cmp     %o4, 0x100              ! 256 bytes or more
1286         blu,pn  %ncc, 3f
1287         nop
1288 
1289 2:
1290         stxa    %g0, [%o0+0x0]%asi
1291         stxa    %g0, [%o0+0x40]%asi
1292         stxa    %g0, [%o0+0x80]%asi
1293         stxa    %g0, [%o0+0xc0]%asi
1294 
1295         stxa    %g0, [%o0+0x8]%asi
1296         stxa    %g0, [%o0+0x10]%asi
1297         stxa    %g0, [%o0+0x18]%asi
1298         stxa    %g0, [%o0+0x20]%asi
1299         stxa    %g0, [%o0+0x28]%asi
1300         stxa    %g0, [%o0+0x30]%asi
1301         stxa    %g0, [%o0+0x38]%asi
1302 
1303         stxa    %g0, [%o0+0x48]%asi
1304         stxa    %g0, [%o0+0x50]%asi
1305         stxa    %g0, [%o0+0x58]%asi
1306         stxa    %g0, [%o0+0x60]%asi
1307         stxa    %g0, [%o0+0x68]%asi
1308         stxa    %g0, [%o0+0x70]%asi
1309         stxa    %g0, [%o0+0x78]%asi
1310 
1311         stxa    %g0, [%o0+0x88]%asi
1312         stxa    %g0, [%o0+0x90]%asi
1313         stxa    %g0, [%o0+0x98]%asi
1314         stxa    %g0, [%o0+0xa0]%asi
1315         stxa    %g0, [%o0+0xa8]%asi
1316         stxa    %g0, [%o0+0xb0]%asi
1317         stxa    %g0, [%o0+0xb8]%asi
1318 
1319         stxa    %g0, [%o0+0xc8]%asi
1320         stxa    %g0, [%o0+0xd0]%asi
1321         stxa    %g0, [%o0+0xd8]%asi
1322         stxa    %g0, [%o0+0xe0]%asi
1323         stxa    %g0, [%o0+0xe8]%asi
1324         stxa    %g0, [%o0+0xf0]%asi
1325         stxa    %g0, [%o0+0xf8]%asi
1326 
1327         sub     %o4, 0x100, %o4
1328         cmp     %o4, 0x100
1329         bgu,pt  %ncc, 2b
1330         add     %o0, 0x100, %o0
1331 
1332 3:
1333         ! ... check if 64 bytes to set
1334         cmp     %o4, 0x40
1335         blu     %ncc, .bzero_blk_done
1336         nop
1337 
1338 4:
1339         stxa    %g0, [%o0+0x0]%asi
1340         stxa    %g0, [%o0+0x8]%asi
1341         stxa    %g0, [%o0+0x10]%asi
1342         stxa    %g0, [%o0+0x18]%asi
1343         stxa    %g0, [%o0+0x20]%asi
1344         stxa    %g0, [%o0+0x28]%asi
1345         stxa    %g0, [%o0+0x30]%asi
1346         stxa    %g0, [%o0+0x38]%asi
1347 
1348         subcc   %o4, 0x40, %o4
1349         bgu,pt  %ncc, 3b
1350         add     %o0, 0x40, %o0
1351 
1352 .bzero_blk_done:
1353         membar  #Sync
1354 
1355 .bzero_small:
1356         ! Set the remaining doubles
1357         subcc   %o3, 8, %o3             ! Can we store any doubles?
1358         blu,pn  %ncc, .byteclr
1359         and     %o1, 7, %o1             ! calc bytes left after doubles
1360 
1361 .dbclr:
1362         stxa    %g0, [%o0]%asi          ! Clear the doubles
1363         subcc   %o3, 8, %o3
1364         bgeu,pt %ncc, .dbclr
1365         add     %o0, 8, %o0
1366 
1367         ba      .byteclr
1368         nop
1369 
1370 .wdalign:                       
1371         andcc   %o0, 3, %o3             ! is add aligned on a word boundary
1372         bz,pn   %ncc, .wdclr
1373         andn    %o1, 3, %o3             ! create word sized count in %o3
1374 
1375         dec     %o1                     ! decrement count
1376         stba    %g0, [%o0]%asi          ! clear a byte
1377         ba      .wdalign
1378         inc     %o0                     ! next byte
1379 
1380 .wdclr:
1381         sta     %g0, [%o0]%asi          ! 4-byte clearing loop
1382         subcc   %o3, 4, %o3
1383         bnz,pt  %ncc, .wdclr
1384         inc     4, %o0
1385 
1386         and     %o1, 3, %o1             ! leftover count, if any
1387 
1388 .byteclr:
1389         ! Set the leftover bytes
1390         brz     %o1, .bzero_exit
1391         nop
1392 
1393 7:
1394         deccc   %o1                     ! byte clearing loop
1395         stba    %g0, [%o0]%asi
1396         bgu,pt  %ncc, 7b
1397         inc     %o0
1398 
1399 .bzero_exit:
1400         !
1401         ! We're just concerned with whether t_lofault was set
1402         ! when we came in. We end up here from either kzero()
1403         ! or bzero(). kzero() *always* sets a lofault handler.
1404         ! It ors LOFAULT_SET into %o5 to indicate it has done
1405         ! this even if the value of %o5 is otherwise zero.
1406         ! bzero() sets a lofault handler *only* if one was
1407         ! previously set. Accordingly we need to examine
1408         ! %o5 and if it is non-zero be sure to clear LOFAULT_SET
1409         ! before resetting the error handler.
1410         !
1411         tst     %o5
1412         bz      %ncc, 1f
1413         andn    %o5, LOFAULT_SET, %o5
1414         membar  #Sync                           ! sync error barrier
1415         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1416 1:
1417         retl
1418         clr     %o0                     ! return (0)
1419 
1420         SET_SIZE(bzero)