1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 # ident "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/param.h>
  29 #include <sys/errno.h>
  30 #include <sys/asm_linkage.h>
  31 #include <sys/vtrace.h>
  32 #include <sys/machthread.h>
  33 #include <sys/clock.h>
  34 #include <sys/asi.h>
  35 #include <sys/fsr.h>
  36 #include <sys/privregs.h>
  37 
  38 #if !defined(lint)
  39 #include "assym.h"
  40 #endif  /* lint */
  41 
  42 
  43 /*
  44  * Less then or equal this number of bytes we will always copy byte-for-byte
  45  */
  46 #define SMALL_LIMIT     7
  47 
  48 /*
  49  * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
  50  * handler was set
  51  */
  52 #define LOFAULT_SET 2
  53 
  54 
  55 /*
  56  * Copy a block of storage, returning an error code if `from' or
  57  * `to' takes a kernel pagefault which cannot be resolved.
  58  * Returns errno value on pagefault error, 0 if all ok
  59  */
  60 
  61 
  62 
  63 #if defined(lint)
  64 
  65 /* ARGSUSED */
  66 int
  67 kcopy(const void *from, void *to, size_t count)
  68 { return(0); }
  69 
  70 #else   /* lint */
  71 
  72         .seg    ".text"
  73         .align  4
  74 
  75         ENTRY(kcopy)
  76 
  77         save    %sp, -SA(MINFRAME), %sp
  78         set     .copyerr, %l7                   ! copyerr is lofault value
  79         ldn     [THREAD_REG + T_LOFAULT], %o5   ! save existing handler
  80         or      %o5, LOFAULT_SET, %o5
  81         membar  #Sync                           ! sync error barrier
  82         b       .do_copy                        ! common code
  83         stn     %l7, [THREAD_REG + T_LOFAULT]   ! set t_lofault
  84 
  85 /*
  86  * We got here because of a fault during kcopy.
  87  * Errno value is in %g1.
  88  */
  89 .copyerr:
  90         ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
  91         ! into %o5 to indicate it has set t_lofault handler. Need to clear
  92         ! LOFAULT_SET flag before restoring the error handler.
  93         andn    %o5, LOFAULT_SET, %o5
  94         membar  #Sync                   ! sync error barrier
  95         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
  96         ret
  97         restore %g1, 0, %o0
  98 
  99         SET_SIZE(kcopy)
 100 #endif  /* lint */
 101 
 102 
 103 /*
 104  * Copy a block of storage - must not overlap (from + len <= to).
 105  */
 106 #if defined(lint)
 107 
 108 /* ARGSUSED */
 109 void
 110 bcopy(const void *from, void *to, size_t count)
 111 {}
 112 
 113 #else   /* lint */
 114 
 115         ENTRY(bcopy)
 116 
 117         save    %sp, -SA(MINFRAME), %sp
 118         clr     %o5                     ! flag LOFAULT_SET is not set for bcopy
 119 
 120 .do_copy:
 121         mov     %i1, %g5                ! save dest addr start
 122 
 123         mov     %i2, %l6                ! save size
 124 
 125         cmp     %i2, 12                 ! for small counts
 126         blu     %ncc, .bytecp           ! just copy bytes
 127           .empty
 128 
 129         !
 130         ! use aligned transfers where possible
 131         !
 132         xor     %i0, %i1, %o4           ! xor from and to address
 133         btst    7, %o4                  ! if lower three bits zero
 134         bz      .aldoubcp               ! can align on double boundary
 135         .empty  ! assembler complaints about label
 136 
 137         xor     %i0, %i1, %o4           ! xor from and to address
 138         btst    3, %o4                  ! if lower two bits zero
 139         bz      .alwordcp               ! can align on word boundary
 140         btst    3, %i0                  ! delay slot, from address unaligned?
 141         !
 142         ! use aligned reads and writes where possible
 143         ! this differs from wordcp in that it copes
 144         ! with odd alignment between source and destnation
 145         ! using word reads and writes with the proper shifts
 146         ! in between to align transfers to and from memory
 147         ! i0 - src address, i1 - dest address, i2 - count
 148         ! i3, i4 - tmps for used generating complete word
 149         ! i5 (word to write)
 150         ! l0 size in bits of upper part of source word (US)
 151         ! l1 size in bits of lower part of source word (LS = 32 - US)
 152         ! l2 size in bits of upper part of destination word (UD)
 153         ! l3 size in bits of lower part of destination word (LD = 32 - UD)
 154         ! l4 number of bytes leftover after aligned transfers complete
 155         ! l5 the number 32
 156         !
 157         mov     32, %l5                 ! load an oft-needed constant
 158         bz      .align_dst_only
 159         btst    3, %i1                  ! is destnation address aligned?
 160         clr     %i4                     ! clear registers used in either case
 161         bz      .align_src_only
 162         clr     %l0
 163         !
 164         ! both source and destination addresses are unaligned
 165         !
 166 1:                                      ! align source
 167         ldub    [%i0], %i3              ! read a byte from source address
 168         add     %i0, 1, %i0             ! increment source address
 169         or      %i4, %i3, %i4           ! or in with previous bytes (if any)
 170         btst    3, %i0                  ! is source aligned?
 171         add     %l0, 8, %l0             ! increment size of upper source (US)
 172         bnz,a   1b
 173         sll     %i4, 8, %i4             ! make room for next byte
 174 
 175         sub     %l5, %l0, %l1           ! generate shift left count (LS)
 176         sll     %i4, %l1, %i4           ! prepare to get rest
 177         ld      [%i0], %i3              ! read a word
 178         add     %i0, 4, %i0             ! increment source address
 179         srl     %i3, %l0, %i5           ! upper src bits into lower dst bits
 180         or      %i4, %i5, %i5           ! merge
 181         mov     24, %l3                 ! align destination
 182 1:
 183         srl     %i5, %l3, %i4           ! prepare to write a single byte
 184         stb     %i4, [%i1]              ! write a byte
 185         add     %i1, 1, %i1             ! increment destination address
 186         sub     %i2, 1, %i2             ! decrement count
 187         btst    3, %i1                  ! is destination aligned?
 188         bnz,a   1b
 189         sub     %l3, 8, %l3             ! delay slot, decrement shift count (LD)
 190         sub     %l5, %l3, %l2           ! generate shift left count (UD)
 191         sll     %i5, %l2, %i5           ! move leftover into upper bytes
 192         cmp     %l2, %l0                ! cmp # reqd to fill dst w old src left
 193         bgu     %ncc, .more_needed      ! need more to fill than we have
 194         nop
 195 
 196         sll     %i3, %l1, %i3           ! clear upper used byte(s)
 197         srl     %i3, %l1, %i3
 198         ! get the odd bytes between alignments
 199         sub     %l0, %l2, %l0           ! regenerate shift count
 200         sub     %l5, %l0, %l1           ! generate new shift left count (LS)
 201         and     %i2, 3, %l4             ! must do remaining bytes if count%4 > 0
 202         andn    %i2, 3, %i2             ! # of aligned bytes that can be moved
 203         srl     %i3, %l0, %i4
 204         or      %i5, %i4, %i5
 205         st      %i5, [%i1]              ! write a word
 206         subcc   %i2, 4, %i2             ! decrement count
 207         bz      %ncc, .unalign_out
 208         add     %i1, 4, %i1             ! increment destination address
 209 
 210         b       2f
 211         sll     %i3, %l1, %i5           ! get leftover into upper bits
 212 .more_needed:
 213         sll     %i3, %l0, %i3           ! save remaining byte(s)
 214         srl     %i3, %l0, %i3
 215         sub     %l2, %l0, %l1           ! regenerate shift count
 216         sub     %l5, %l1, %l0           ! generate new shift left count
 217         sll     %i3, %l1, %i4           ! move to fill empty space
 218         b       3f
 219         or      %i5, %i4, %i5           ! merge to complete word
 220         !
 221         ! the source address is aligned and destination is not
 222         !
 223 .align_dst_only:
 224         ld      [%i0], %i4              ! read a word
 225         add     %i0, 4, %i0             ! increment source address
 226         mov     24, %l0                 ! initial shift alignment count
 227 1:
 228         srl     %i4, %l0, %i3           ! prepare to write a single byte
 229         stb     %i3, [%i1]              ! write a byte
 230         add     %i1, 1, %i1             ! increment destination address
 231         sub     %i2, 1, %i2             ! decrement count
 232         btst    3, %i1                  ! is destination aligned?
 233         bnz,a   1b
 234         sub     %l0, 8, %l0             ! delay slot, decrement shift count
 235 .xfer:
 236         sub     %l5, %l0, %l1           ! generate shift left count
 237         sll     %i4, %l1, %i5           ! get leftover
 238 3:
 239         and     %i2, 3, %l4             ! must do remaining bytes if count%4 > 0
 240         andn    %i2, 3, %i2             ! # of aligned bytes that can be moved
 241 2:
 242         ld      [%i0], %i3              ! read a source word
 243         add     %i0, 4, %i0             ! increment source address
 244         srl     %i3, %l0, %i4           ! upper src bits into lower dst bits
 245         or      %i5, %i4, %i5           ! merge with upper dest bits (leftover)
 246         st      %i5, [%i1]              ! write a destination word
 247         subcc   %i2, 4, %i2             ! decrement count
 248         bz      %ncc, .unalign_out      ! check if done
 249         add     %i1, 4, %i1             ! increment destination address
 250         b       2b                      ! loop
 251         sll     %i3, %l1, %i5           ! get leftover
 252 .unalign_out:
 253         tst     %l4                     ! any bytes leftover?
 254         bz      %ncc, .cpdone
 255         .empty                          ! allow next instruction in delay slot
 256 1:
 257         sub     %l0, 8, %l0             ! decrement shift
 258         srl     %i3, %l0, %i4           ! upper src byte into lower dst byte
 259         stb     %i4, [%i1]              ! write a byte
 260         subcc   %l4, 1, %l4             ! decrement count
 261         bz      %ncc, .cpdone           ! done?
 262         add     %i1, 1, %i1             ! increment destination
 263         tst     %l0                     ! any more previously read bytes
 264         bnz     %ncc, 1b                ! we have leftover bytes
 265         mov     %l4, %i2                ! delay slot, mv cnt where dbytecp wants
 266         b       .dbytecp                ! let dbytecp do the rest
 267         sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 268         !
 269         ! the destination address is aligned and the source is not
 270         !
 271 .align_src_only:
 272         ldub    [%i0], %i3              ! read a byte from source address
 273         add     %i0, 1, %i0             ! increment source address
 274         or      %i4, %i3, %i4           ! or in with previous bytes (if any)
 275         btst    3, %i0                  ! is source aligned?
 276         add     %l0, 8, %l0             ! increment shift count (US)
 277         bnz,a   .align_src_only
 278         sll     %i4, 8, %i4             ! make room for next byte
 279         b,a     .xfer
 280         !
 281         ! if from address unaligned for double-word moves,
 282         ! move bytes till it is, if count is < 56 it could take
 283         ! longer to align the thing than to do the transfer
 284         ! in word size chunks right away
 285         !
 286 .aldoubcp:
 287         cmp     %i2, 56                 ! if count < 56, use wordcp, it takes
 288         blu,a   %ncc, .alwordcp         ! longer to align doubles than words
 289         mov     3, %o0                  ! mask for word alignment
 290         call    .alignit                ! copy bytes until aligned
 291         mov     7, %o0                  ! mask for double alignment
 292         !
 293         ! source and destination are now double-word aligned
 294         ! i3 has aligned count returned by alignit
 295         !
 296         and     %i2, 7, %i2             ! unaligned leftover count
 297         sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 298 5:
 299         ldx     [%i0+%i1], %o4          ! read from address
 300         stx     %o4, [%i1]              ! write at destination address
 301         subcc   %i3, 8, %i3             ! dec count
 302         bgu     %ncc, 5b
 303         add     %i1, 8, %i1             ! delay slot, inc to address
 304         cmp     %i2, 4                  ! see if we can copy a word
 305         blu     %ncc, .dbytecp          ! if 3 or less bytes use bytecp
 306         .empty
 307         !
 308         ! for leftover bytes we fall into wordcp, if needed
 309         !
 310 .wordcp:
 311         and     %i2, 3, %i2             ! unaligned leftover count
 312 5:
 313         ld      [%i0+%i1], %o4          ! read from address
 314         st      %o4, [%i1]              ! write at destination address
 315         subcc   %i3, 4, %i3             ! dec count
 316         bgu     %ncc, 5b
 317         add     %i1, 4, %i1             ! delay slot, inc to address
 318         b,a     .dbytecp
 319 
 320         ! we come here to align copies on word boundaries
 321 .alwordcp:
 322         call    .alignit                ! go word-align it
 323         mov     3, %o0                  ! bits that must be zero to be aligned
 324         b       .wordcp
 325         sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 326 
 327         !
 328         ! byte copy, works with any alignment
 329         !
 330 .bytecp:
 331         b       .dbytecp
 332         sub     %i0, %i1, %i0           ! i0 gets difference of src and dst
 333 
 334         !
 335         ! differenced byte copy, works with any alignment
 336         ! assumes dest in %i1 and (source - dest) in %i0
 337         !
 338 1:
 339         stb     %o4, [%i1]              ! write to address
 340         inc     %i1                     ! inc to address
 341 .dbytecp:
 342         deccc   %i2                     ! dec count
 343         bgeu,a  %ncc, 1b                ! loop till done
 344         ldub    [%i0+%i1], %o4          ! read from address
 345 .cpdone:
 346         membar  #Sync                           ! sync error barrier
 347         ! Restore t_lofault handler, if came here from kcopy().
 348         tst     %o5
 349         bz      %ncc, 1f
 350         andn    %o5, LOFAULT_SET, %o5
 351         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 352 1:
 353         mov     %g5, %o0                ! copy dest address
 354         call    sync_icache
 355         mov     %l6, %o1                ! saved size
 356         ret
 357         restore %g0, 0, %o0             ! return (0)
 358 
 359 /*
 360  * Common code used to align transfers on word and doubleword
 361  * boudaries.  Aligns source and destination and returns a count
 362  * of aligned bytes to transfer in %i3
 363  */
 364 1:
 365         inc     %i0                     ! inc from
 366         stb     %o4, [%i1]              ! write a byte
 367         inc     %i1                     ! inc to
 368         dec     %i2                     ! dec count
 369 .alignit:
 370         btst    %o0, %i0                ! %o0 is bit mask to check for alignment
 371         bnz,a   1b
 372         ldub    [%i0], %o4              ! read next byte
 373 
 374         retl
 375         andn    %i2, %o0, %i3           ! return size of aligned bytes
 376         SET_SIZE(bcopy)
 377 
 378 #endif  /* lint */
 379 
 380 /*
 381  * Block copy with possibly overlapped operands.
 382  */
 383 
 384 #if defined(lint)
 385 
 386 /*ARGSUSED*/
 387 void
 388 ovbcopy(const void *from, void *to, size_t count)
 389 {}
 390 
 391 #else   /* lint */
 392 
 393         ENTRY(ovbcopy)
 394         tst     %o2                     ! check count
 395         bgu,a   %ncc, 1f                ! nothing to do or bad arguments
 396         subcc   %o0, %o1, %o3           ! difference of from and to address
 397 
 398         retl                            ! return
 399         nop
 400 1:
 401         bneg,a  %ncc, 2f
 402         neg     %o3                     ! if < 0, make it positive
 403 2:      cmp     %o2, %o3                ! cmp size and abs(from - to)
 404         bleu    %ncc, bcopy             ! if size <= abs(diff): use bcopy,
 405         .empty                          !   no overlap
 406         cmp     %o0, %o1                ! compare from and to addresses
 407         blu     %ncc, .ov_bkwd          ! if from < to, copy backwards
 408         nop
 409         !
 410         ! Copy forwards.
 411         !
 412 .ov_fwd:
 413         ldub    [%o0], %o3              ! read from address
 414         inc     %o0                     ! inc from address
 415         stb     %o3, [%o1]              ! write to address
 416         deccc   %o2                     ! dec count
 417         bgu     %ncc, .ov_fwd           ! loop till done
 418         inc     %o1                     ! inc to address
 419 
 420         retl                            ! return
 421         nop
 422         !
 423         ! Copy backwards.
 424         !
 425 .ov_bkwd:
 426         deccc   %o2                     ! dec count
 427         ldub    [%o0 + %o2], %o3        ! get byte at end of src
 428         bgu     %ncc, .ov_bkwd          ! loop till done
 429         stb     %o3, [%o1 + %o2]        ! delay slot, store at end of dst
 430 
 431         retl                            ! return
 432         nop
 433         SET_SIZE(ovbcopy)
 434 
 435 #endif  /* lint */
 436 
 437 /*
 438  * hwblkpagecopy()
 439  *
 440  * Copies exactly one page.  This routine assumes the caller (ppcopy)
 441  * has already disabled kernel preemption and has checked
 442  * use_hw_bcopy.
 443  */
 444 #ifdef lint
 445 /*ARGSUSED*/
 446 void
 447 hwblkpagecopy(const void *src, void *dst)
 448 { }
 449 #else /* lint */
 450         ENTRY(hwblkpagecopy)
 451         save    %sp, -SA(MINFRAME), %sp
 452 
 453         ! %i0 - source address (arg)
 454         ! %i1 - destination address (arg)
 455         ! %i2 - length of region (not arg)
 456 
 457         set     PAGESIZE, %i2
 458         mov     %i1,    %o0     ! store destination address for flushing
 459 
 460         /*
 461          * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 
 462          */
 463 1:
 464         ldx     [%i0+0x0], %l0
 465         ldx     [%i0+0x8], %l1
 466         ldx     [%i0+0x10], %l2
 467         ldx     [%i0+0x18], %l3
 468         ldx     [%i0+0x20], %l4
 469         ldx     [%i0+0x28], %l5
 470         ldx     [%i0+0x30], %l6
 471         ldx     [%i0+0x38], %l7
 472         stx     %l0, [%i1+0x0]
 473         stx     %l1, [%i1+0x8]
 474         stx     %l2, [%i1+0x10]
 475         stx     %l3, [%i1+0x18]
 476         stx     %l4, [%i1+0x20]
 477         stx     %l5, [%i1+0x28]
 478         stx     %l6, [%i1+0x30]
 479         stx     %l7, [%i1+0x38]
 480 
 481         ldx     [%i0+0x40], %l0
 482         ldx     [%i0+0x48], %l1
 483         ldx     [%i0+0x50], %l2
 484         ldx     [%i0+0x58], %l3
 485         ldx     [%i0+0x60], %l4
 486         ldx     [%i0+0x68], %l5
 487         ldx     [%i0+0x70], %l6
 488         ldx     [%i0+0x78], %l7
 489         stx     %l0, [%i1+0x40]
 490         stx     %l1, [%i1+0x48]
 491         stx     %l2, [%i1+0x50]
 492         stx     %l3, [%i1+0x58]
 493         stx     %l4, [%i1+0x60]
 494         stx     %l5, [%i1+0x68]
 495         stx     %l6, [%i1+0x70]
 496         stx     %l7, [%i1+0x78]
 497 
 498         add     %i0, 0x80, %i0
 499         subcc   %i2, 0x80, %i2
 500         bgu,pt  %xcc, 1b
 501         add     %i1, 0x80, %i1
 502 
 503         ! %o0 contains the dest. address
 504         set     PAGESIZE, %o1
 505         call    sync_icache
 506         nop
 507 
 508         membar #Sync
 509         ret
 510         restore %g0, 0, %o0
 511         SET_SIZE(hwblkpagecopy)
 512 #endif  /* lint */
 513 
 514 
 515 /*
 516  * Transfer data to and from user space -
 517  * Note that these routines can cause faults
 518  * It is assumed that the kernel has nothing at
 519  * less than KERNELBASE in the virtual address space.
 520  *
 521  * Note that copyin(9F) and copyout(9F) are part of the
 522  * DDI/DKI which specifies that they return '-1' on "errors."
 523  *
 524  * Sigh.
 525  *
 526  * So there's two extremely similar routines - xcopyin() and xcopyout()
 527  * which return the errno that we've faithfully computed.  This
 528  * allows other callers (e.g. uiomove(9F)) to work correctly.
 529  * Given that these are used pretty heavily, we expand the calling
 530  * sequences inline for all flavours (rather than making wrappers).
 531  *
 532  * There are also stub routines for xcopyout_little and xcopyin_little,
 533  * which currently are intended to handle requests of <= 16 bytes from
 534  * do_unaligned. Future enhancement to make them handle 8k pages efficiently
 535  * is left as an exercise...
 536  */
 537 
 538 /*
 539  * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
 540  *
 541  * General theory of operation:
 542  *
 543  * None of the copyops routines grab a window.
 544  *
 545  * Flow:
 546  *
 547  * If count == zero return zero.
 548  *
 549  * Store the previous lo_fault handler into %g6.
 550  * Place our secondary lofault handler into %g5.
 551  * Place the address of our fault handler into %o3.
 552  *
 553  * If count is less than or equal to SMALL_LIMIT (7) we
 554  * always do a byte for byte copy.
 555  *
 556  * If count is > SMALL_LIMIT, we check the alignment of the input
 557  * and output pointers.  We store -count in %o3, we store the number
 558  * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
 559  * in %o2. Following this we branch to the appropriate copy loop and
 560  * copy that many chunks.  Since we've been adding the chunk size
 561  * to %o3 each time through as well as decrementing %o2, we can tell
 562  * if any data is is left to be copied by examining %o3. If that is
 563  * zero, we're done and can go home. If not, we figure out what the
 564  * largest chunk size left to be copied is and branch to that copy
 565  * loop unless there's only one byte left. We load that as we're
 566  * branching to code that stores it just before we return.
 567  *
 568  * Fault handlers are invoked if we reference memory that has no
 569  * current mapping.  All forms share the same copyio_fault handler.
 570  * This routine handles fixing up the stack and general housecleaning.
 571  * Each copy operation has a simple fault handler that is then called
 572  * to do the work specific to the invidual operation.  The handler
 573  * for copyOP and xcopyOP are found at the end of individual function.
 574  * The handlers for xcopyOP_little are found at the end of xcopyin_little.
 575  * The handlers for copyOP_noerr are found at the end of copyin_noerr.
 576  */
 577 
 578 /*
 579  * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
 580  */
 581 
 582 #if defined(lint)
 583 
 584 /*ARGSUSED*/
 585 int
 586 copyout(const void *kaddr, void *uaddr, size_t count)
 587 { return (0); }
 588 
 589 #else   /* lint */
 590 
 591 /*
 592  * We save the arguments in the following registers in case of a fault:
 593  *      kaddr - %g2
 594  *      uaddr - %g3
 595  *      count - %g4
 596  */
 597 #define SAVE_SRC        %g2
 598 #define SAVE_DST        %g3
 599 #define SAVE_COUNT      %g4
 600 
 601 #define REAL_LOFAULT            %g5
 602 #define SAVED_LOFAULT           %g6
 603 
 604 /*
 605  * Generic copyio fault handler.  This is the first line of defense when a 
 606  * fault occurs in (x)copyin/(x)copyout.  In order for this to function
 607  * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
 608  * This allows us to share common code for all the flavors of the copy
 609  * operations, including the _noerr versions.
 610  *
 611  * Note that this function will restore the original input parameters before
 612  * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
 613  * member of the t_copyop structure, if needed.
 614  */
 615         ENTRY(copyio_fault)
 616         membar  #Sync
 617         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
 618 
 619         mov     SAVE_SRC, %o0
 620         mov     SAVE_DST, %o1
 621         jmp     REAL_LOFAULT
 622           mov   SAVE_COUNT, %o2
 623         SET_SIZE(copyio_fault)
 624 
 625         ENTRY(copyout)
 626         sethi   %hi(.copyout_err), REAL_LOFAULT
 627         or      REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
 628 
 629 .do_copyout:
 630         !
 631         ! Check the length and bail if zero.
 632         !
 633         tst     %o2
 634         bnz,pt  %ncc, 1f
 635           nop
 636         retl
 637           clr   %o0
 638 1:
 639         sethi   %hi(copyio_fault), %o3
 640         ldn     [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
 641         or      %o3, %lo(copyio_fault), %o3
 642         membar  #Sync
 643         stn     %o3, [THREAD_REG + T_LOFAULT]
 644 
 645         mov     %o0, SAVE_SRC
 646         mov     %o1, SAVE_DST
 647         mov     %o2, SAVE_COUNT
 648 
 649         !
 650         ! Check to see if we're more than SMALL_LIMIT (7 bytes).
 651         ! Run in leaf mode, using the %o regs as our input regs.
 652         !
 653         subcc   %o2, SMALL_LIMIT, %o3
 654         bgu,a,pt %ncc, .dco_ns
 655         or      %o0, %o1, %o3
 656 
 657 .dcobcp:
 658         sub     %g0, %o2, %o3           ! negate count
 659         add     %o0, %o2, %o0           ! make %o0 point at the end
 660         add     %o1, %o2, %o1           ! make %o1 point at the end
 661         ba,pt   %ncc, .dcocl
 662         ldub    [%o0 + %o3], %o4        ! load first byte
 663         !
 664         ! %o0 and %o2 point at the end and remain pointing at the end
 665         ! of their buffers. We pull things out by adding %o3 (which is
 666         ! the negation of the length) to the buffer end which gives us
 667         ! the curent location in the buffers. By incrementing %o3 we walk
 668         ! through both buffers without having to bump each buffer's
 669         ! pointer. A very fast 4 instruction loop.
 670         !
 671         .align 16
 672 .dcocl:
 673         stba    %o4, [%o1 + %o3]ASI_USER
 674         inccc   %o3
 675         bl,a,pt %ncc, .dcocl
 676         ldub    [%o0 + %o3], %o4
 677         !
 678         ! We're done. Go home.
 679         !
 680         membar  #Sync
 681         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
 682         retl
 683         clr     %o0
 684         !
 685         ! Try aligned copies from here.
 686         !
 687 .dco_ns:
 688         ! %o0 = kernel addr (to be copied from)
 689         ! %o1 = user addr (to be copied to)
 690         ! %o2 = length
 691         ! %o3 = %o1 | %o2 (used for alignment checking)
 692         ! %o4 is alternate lo_fault
 693         ! %o5 is original lo_fault
 694         !
 695         ! See if we're single byte aligned. If we are, check the
 696         ! limit for single byte copies. If we're smaller or equal,
 697         ! bounce to the byte for byte copy loop. Otherwise do it in
 698         ! HW (if enabled).
 699         !
 700         btst    1, %o3
 701         bz,pt   %icc, .dcoh8
 702         btst    7, %o3
 703 
 704         ba      .dcobcp
 705         nop
 706 .dcoh8:
 707         !
 708         ! 8 byte aligned?
 709         !
 710         bnz,a   %ncc, .dcoh4
 711         btst    3, %o3
 712 .dcos8:
 713         !
 714         ! Housekeeping for copy loops. Uses same idea as in the byte for
 715         ! byte copy loop above.
 716         !
 717         add     %o0, %o2, %o0
 718         add     %o1, %o2, %o1
 719         sub     %g0, %o2, %o3
 720         ba,pt   %ncc, .dodebc
 721         srl     %o2, 3, %o2             ! Number of 8 byte chunks to copy
 722         !
 723         ! 4 byte aligned?
 724         !
 725 .dcoh4:
 726         bnz,pn  %ncc, .dcoh2
 727         nop
 728 .dcos4:
 729         add     %o0, %o2, %o0
 730         add     %o1, %o2, %o1
 731         sub     %g0, %o2, %o3
 732         ba,pt   %ncc, .dodfbc
 733         srl     %o2, 2, %o2             ! Number of 4 byte chunks to copy
 734         !
 735         ! We must be 2 byte aligned. Off we go.
 736         ! The check for small copies was done in the
 737         ! delay at .dcoh4
 738         !
 739 .dcoh2:
 740 .dcos2:
 741         add     %o0, %o2, %o0
 742         add     %o1, %o2, %o1
 743         sub     %g0, %o2, %o3
 744         ba,pt   %ncc, .dodtbc
 745         srl     %o2, 1, %o2             ! Number of 2 byte chunks to copy
 746 
 747 .dodebc:
 748         ldx     [%o0 + %o3], %o4
 749         deccc   %o2
 750         stxa    %o4, [%o1 + %o3]ASI_USER
 751         bg,pt   %ncc, .dodebc
 752         addcc   %o3, 8, %o3
 753         !
 754         ! End of copy loop. Check to see if we're done. Most
 755         ! eight byte aligned copies end here.
 756         !
 757         bz,pt   %ncc, .dcofh
 758         nop
 759         !
 760         ! Something is left - do it byte for byte.
 761         ! 
 762         ba,pt   %ncc, .dcocl
 763         ldub    [%o0 + %o3], %o4        ! load next byte
 764         !
 765         ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
 766         !
 767         .align 32
 768 .dodfbc:
 769         lduw    [%o0 + %o3], %o4
 770         deccc   %o2
 771         sta     %o4, [%o1 + %o3]ASI_USER
 772         bg,pt   %ncc, .dodfbc
 773         addcc   %o3, 4, %o3
 774         !
 775         ! End of copy loop. Check to see if we're done. Most
 776         ! four byte aligned copies end here.
 777         !
 778         bz,pt   %ncc, .dcofh
 779         nop
 780         !
 781         ! Something is left. Do it byte for byte.
 782         !
 783         ba,pt   %ncc, .dcocl
 784         ldub    [%o0 + %o3], %o4        ! load next byte
 785         !
 786         ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
 787         ! copy.
 788         !
 789         .align 32
 790 .dodtbc:
 791         lduh    [%o0 + %o3], %o4
 792         deccc   %o2
 793         stha    %o4, [%o1 + %o3]ASI_USER
 794         bg,pt   %ncc, .dodtbc
 795         addcc   %o3, 2, %o3
 796         !
 797         ! End of copy loop. Anything left?
 798         !
 799         bz,pt   %ncc, .dcofh
 800         nop
 801         !
 802         ! Deal with the last byte
 803         !
 804         ldub    [%o0 + %o3], %o4
 805         stba    %o4, [%o1 + %o3]ASI_USER
 806 .dcofh:
 807         membar  #Sync
 808         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
 809         retl
 810         clr     %o0
 811 
 812 .copyout_err:
 813         ldn     [THREAD_REG + T_COPYOPS], %o4
 814         brz     %o4, 2f
 815         nop
 816         ldn     [%o4 + CP_COPYOUT], %g2
 817         jmp     %g2
 818         nop
 819 2:
 820         retl
 821         mov     -1, %o0
 822         SET_SIZE(copyout)
 823 
 824 #endif  /* lint */
 825 
 826 
 827 #ifdef  lint
 828 
 829 /*ARGSUSED*/
 830 int
 831 xcopyout(const void *kaddr, void *uaddr, size_t count)
 832 { return (0); }
 833 
 834 #else   /* lint */
 835 
 836         ENTRY(xcopyout)
 837         sethi   %hi(.xcopyout_err), REAL_LOFAULT
 838         b       .do_copyout
 839           or    REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
 840 .xcopyout_err:
 841         ldn     [THREAD_REG + T_COPYOPS], %o4
 842         brz     %o4, 2f
 843         nop
 844         ldn     [%o4 + CP_XCOPYOUT], %g2
 845         jmp     %g2
 846         nop
 847 2:
 848         retl
 849         mov     %g1, %o0
 850         SET_SIZE(xcopyout)
 851 
 852 #endif  /* lint */
 853         
 854 #ifdef  lint
 855 
 856 /*ARGSUSED*/
 857 int
 858 xcopyout_little(const void *kaddr, void *uaddr, size_t count)
 859 { return (0); }
 860 
 861 #else   /* lint */
 862 
 863         ENTRY(xcopyout_little)
 864         sethi   %hi(.little_err), %o4
 865         ldn     [THREAD_REG + T_LOFAULT], %o5
 866         or      %o4, %lo(.little_err), %o4
 867         membar  #Sync                   ! sync error barrier
 868         stn     %o4, [THREAD_REG + T_LOFAULT]
 869 
 870         subcc   %g0, %o2, %o3
 871         add     %o0, %o2, %o0
 872         bz,pn   %ncc, 2f                ! check for zero bytes
 873         sub     %o2, 1, %o4
 874         add     %o0, %o4, %o0           ! start w/last byte
 875         add     %o1, %o2, %o1
 876         ldub    [%o0+%o3], %o4
 877 
 878 1:      stba    %o4, [%o1+%o3]ASI_AIUSL
 879         inccc   %o3
 880         sub     %o0, 2, %o0             ! get next byte
 881         bcc,a,pt %ncc, 1b
 882           ldub  [%o0+%o3], %o4
 883 
 884 2:      membar  #Sync                   ! sync error barrier
 885         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 886         retl
 887         mov     %g0, %o0                ! return (0)
 888         SET_SIZE(xcopyout_little)
 889 
 890 #endif  /* lint */
 891 
 892 /*
 893  * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
 894  */
 895 
 896 #if defined(lint)
 897 
 898 /*ARGSUSED*/
 899 int
 900 copyin(const void *uaddr, void *kaddr, size_t count)
 901 { return (0); }
 902 
 903 #else   /* lint */
 904 
 905         ENTRY(copyin)
 906         sethi   %hi(.copyin_err), REAL_LOFAULT
 907         or      REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
 908 
 909 .do_copyin:
 910         !
 911         ! Check the length and bail if zero.
 912         !
 913         tst     %o2
 914         bnz,pt  %ncc, 1f
 915           nop
 916         retl
 917           clr   %o0
 918 1:
 919         sethi   %hi(copyio_fault), %o3
 920         ldn     [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
 921         or      %o3, %lo(copyio_fault), %o3
 922         membar  #Sync
 923         stn     %o3, [THREAD_REG + T_LOFAULT]
 924 
 925         mov     %o0, SAVE_SRC
 926         mov     %o1, SAVE_DST
 927         mov     %o2, SAVE_COUNT
 928 
 929         !
 930         ! Check to see if we're more than SMALL_LIMIT.
 931         !
 932         subcc   %o2, SMALL_LIMIT, %o3
 933         bgu,a,pt %ncc, .dci_ns
 934         or      %o0, %o1, %o3
 935 
 936 .dcibcp:
 937         sub     %g0, %o2, %o3           ! setup for copy loop
 938         add     %o0, %o2, %o0
 939         add     %o1, %o2, %o1
 940         ba,pt   %ncc, .dcicl
 941         lduba   [%o0 + %o3]ASI_USER, %o4
 942         !
 943         ! %o0 and %o1 point at the end and remain pointing at the end
 944         ! of their buffers. We pull things out by adding %o3 (which is
 945         ! the negation of the length) to the buffer end which gives us
 946         ! the curent location in the buffers. By incrementing %o3 we walk
 947         ! through both buffers without having to bump each buffer's
 948         ! pointer. A very fast 4 instruction loop.
 949         !
 950         .align 16
 951 .dcicl:
 952         stb     %o4, [%o1 + %o3]
 953         inccc   %o3
 954         bl,a,pt %ncc, .dcicl
 955         lduba   [%o0 + %o3]ASI_USER, %o4
 956         !
 957         ! We're done. Go home.
 958         !       
 959         membar  #Sync
 960         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
 961         retl
 962         clr     %o0
 963         !
 964         ! Try aligned copies from here.
 965         !
 966 .dci_ns:
 967         !
 968         ! See if we're single byte aligned. If we are, check the
 969         ! limit for single byte copies. If we're smaller, or equal,
 970         ! bounce to the byte for byte copy loop. Otherwise do it in
 971         ! HW (if enabled).
 972         !
 973         btst    1, %o3
 974         bz,a,pt %icc, .dcih8
 975         btst    7, %o3
 976         ba      .dcibcp
 977         nop
 978 
 979 .dcih8:
 980         !
 981         ! 8 byte aligned?
 982         !
 983         bnz,a   %ncc, .dcih4
 984         btst    3, %o3
 985 .dcis8:
 986         !
 987         ! Housekeeping for copy loops. Uses same idea as in the byte for
 988         ! byte copy loop above.
 989         !
 990         add     %o0, %o2, %o0
 991         add     %o1, %o2, %o1
 992         sub     %g0, %o2, %o3
 993         ba,pt   %ncc, .didebc
 994         srl     %o2, 3, %o2             ! Number of 8 byte chunks to copy
 995         !
 996         ! 4 byte aligned?
 997         !
 998 .dcih4:
 999         bnz     %ncc, .dcih2
1000         nop
1001 .dcis4:
1002         !
1003         ! Housekeeping for copy loops. Uses same idea as in the byte
1004         ! for byte copy loop above.
1005         !
1006         add     %o0, %o2, %o0
1007         add     %o1, %o2, %o1
1008         sub     %g0, %o2, %o3
1009         ba,pt   %ncc, .didfbc
1010         srl     %o2, 2, %o2             ! Number of 4 byte chunks to copy
1011 .dcih2:
1012 .dcis2:
1013         add     %o0, %o2, %o0
1014         add     %o1, %o2, %o1
1015         sub     %g0, %o2, %o3
1016         ba,pt   %ncc, .didtbc
1017         srl     %o2, 1, %o2             ! Number of 2 byte chunks to copy
1018 
1019 .didebc:
1020         ldxa    [%o0 + %o3]ASI_USER, %o4
1021         deccc   %o2
1022         stx     %o4, [%o1 + %o3]
1023         bg,pt   %ncc, .didebc
1024         addcc   %o3, 8, %o3
1025         !
1026         ! End of copy loop. Most 8 byte aligned copies end here.
1027         !
1028         bz,pt   %ncc, .dcifh
1029         nop
1030         !
1031         ! Something is left. Do it byte for byte.
1032         !
1033         ba,pt   %ncc, .dcicl
1034         lduba   [%o0 + %o3]ASI_USER, %o4
1035         !
1036         ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
1037         !
1038         .align 32
1039 .didfbc:
1040         lduwa   [%o0 + %o3]ASI_USER, %o4
1041         deccc   %o2
1042         st      %o4, [%o1 + %o3]
1043         bg,pt   %ncc, .didfbc
1044         addcc   %o3, 4, %o3
1045         !
1046         ! End of copy loop. Most 4 byte aligned copies end here.
1047         !
1048         bz,pt   %ncc, .dcifh
1049         nop
1050         !
1051         ! Something is left. Do it byte for byte.
1052         !
1053         ba,pt   %ncc, .dcicl
1054         lduba   [%o0 + %o3]ASI_USER, %o4
1055         !
1056         ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
1057         ! copy.
1058         !
1059         .align 32
1060 .didtbc:
1061         lduha   [%o0 + %o3]ASI_USER, %o4
1062         deccc   %o2
1063         sth     %o4, [%o1 + %o3]
1064         bg,pt   %ncc, .didtbc
1065         addcc   %o3, 2, %o3
1066         !
1067         ! End of copy loop. Most 2 byte aligned copies end here.
1068         !
1069         bz,pt   %ncc, .dcifh
1070         nop
1071         !
1072         ! Deal with the last byte
1073         !
1074         lduba   [%o0 + %o3]ASI_USER, %o4
1075         stb     %o4, [%o1 + %o3]
1076 .dcifh:
1077         membar  #Sync
1078         stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1079         retl
1080         clr     %o0
1081 
1082 .copyin_err:
1083         ldn     [THREAD_REG + T_COPYOPS], %o4
1084         brz     %o4, 2f
1085         nop
1086         ldn     [%o4 + CP_COPYIN], %g2
1087         jmp     %g2
1088         nop
1089 2:
1090         retl
1091         mov     -1, %o0
1092         SET_SIZE(copyin)
1093 
1094 #endif  /* lint */
1095 
1096 #ifdef  lint
1097 
1098 /*ARGSUSED*/
1099 int
1100 xcopyin(const void *uaddr, void *kaddr, size_t count)
1101 { return (0); }
1102 
1103 #else   /* lint */
1104 
1105         ENTRY(xcopyin)
1106         sethi   %hi(.xcopyin_err), REAL_LOFAULT
1107         b       .do_copyin
1108           or    REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
1109 .xcopyin_err:
1110         ldn     [THREAD_REG + T_COPYOPS], %o4
1111         brz     %o4, 2f
1112         nop
1113         ldn     [%o4 + CP_XCOPYIN], %g2
1114         jmp     %g2
1115         nop
1116 2:
1117         retl
1118         mov     %g1, %o0
1119         SET_SIZE(xcopyin)
1120 
1121 #endif  /* lint */
1122 
1123 #ifdef  lint
1124 
1125 /*ARGSUSED*/
1126 int
1127 xcopyin_little(const void *uaddr, void *kaddr, size_t count)
1128 { return (0); }
1129 
1130 #else   /* lint */
1131 
1132         ENTRY(xcopyin_little)
1133         sethi   %hi(.little_err), %o4
1134         ldn     [THREAD_REG + T_LOFAULT], %o5
1135         or      %o4, %lo(.little_err), %o4
1136         membar  #Sync                           ! sync error barrier
1137         stn     %o4, [THREAD_REG + T_LOFAULT]   
1138 
1139         subcc   %g0, %o2, %o3
1140         add     %o0, %o2, %o0
1141         bz,pn   %ncc, 2f                ! check for zero bytes
1142         sub     %o2, 1, %o4
1143         add     %o0, %o4, %o0           ! start w/last byte     
1144         add     %o1, %o2, %o1
1145         lduba   [%o0+%o3]ASI_AIUSL, %o4
1146 
1147 1:      stb     %o4, [%o1+%o3]
1148         inccc   %o3
1149         sub     %o0, 2, %o0             ! get next byte
1150         bcc,a,pt %ncc, 1b
1151           lduba [%o0+%o3]ASI_AIUSL, %o4
1152 
1153 2:      membar  #Sync                           ! sync error barrier
1154         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1155         retl
1156         mov     %g0, %o0                ! return (0)
1157 
1158 .little_err:
1159         membar  #Sync                           ! sync error barrier
1160         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1161         retl
1162         mov     %g1, %o0
1163         SET_SIZE(xcopyin_little)
1164 
1165 #endif  /* lint */
1166 
1167 
1168 /*
1169  * Copy a block of storage - must not overlap (from + len <= to).
1170  * No fault handler installed (to be called under on_fault())
1171  */
1172 #if defined(lint)
1173 
1174 /* ARGSUSED */
1175 void
1176 copyin_noerr(const void *ufrom, void *kto, size_t count)
1177 {}
1178 
1179 #else   /* lint */
1180 
1181         ENTRY(copyin_noerr)
1182         sethi   %hi(.copyio_noerr), REAL_LOFAULT
1183         b       .do_copyin
1184           or    REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1185 .copyio_noerr:
1186         jmp     SAVED_LOFAULT
1187           nop
1188         SET_SIZE(copyin_noerr)
1189 
1190 #endif /* lint */
1191 
1192 /*
1193  * Copy a block of storage - must not overlap (from + len <= to).
1194  * No fault handler installed (to be called under on_fault())
1195  */
1196 
1197 #if defined(lint)
1198 
1199 /* ARGSUSED */
1200 void
1201 copyout_noerr(const void *kfrom, void *uto, size_t count)
1202 {}
1203 
1204 #else   /* lint */
1205 
1206         ENTRY(copyout_noerr)
1207         sethi   %hi(.copyio_noerr), REAL_LOFAULT
1208         b       .do_copyout
1209           or    REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1210         SET_SIZE(copyout_noerr)
1211 
1212 #endif /* lint */
1213 
1214 #if defined(lint)
1215 
1216 int use_hw_bcopy = 1;
1217 int use_hw_bzero = 1;
1218 
1219 #else /* !lint */
1220 
1221         .align  4
1222         DGDEF(use_hw_bcopy)
1223         .word   1
1224         DGDEF(use_hw_bzero)
1225         .word   1
1226 
1227         .align  64
1228         .section ".text"
1229 #endif /* !lint */
1230 
1231 
1232 /*
1233  * hwblkclr - clears block-aligned, block-multiple-sized regions that are
1234  * longer than 256 bytes in length. For the generic module we will simply
1235  * call bzero and return 1 to ensure that the pages in cache should be
1236  * flushed to ensure integrity.
1237  * Caller is responsible for ensuring use_hw_bzero is true and that
1238  * kpreempt_disable() has been called.
1239  */
1240 #ifdef lint
1241 /*ARGSUSED*/
1242 int
1243 hwblkclr(void *addr, size_t len)
1244 { 
1245         return(0);
1246 }
1247 #else /* lint */
1248         ! %i0 - start address
1249         ! %i1 - length of region (multiple of 64)
1250 
1251         ENTRY(hwblkclr)
1252         save    %sp, -SA(MINFRAME), %sp
1253 
1254         ! Simply call bzero and notify the caller that bzero was used
1255         mov     %i0, %o0
1256         call    bzero
1257           mov   %i1, %o1
1258         ret
1259         restore %g0, 1, %o0     ! return (1) - did not use block operations
1260 
1261         SET_SIZE(hwblkclr)
1262 #endif  /* lint */
1263 
1264 #ifdef  lint
1265 /* Copy 32 bytes of data from src to dst using physical addresses */
1266 /*ARGSUSED*/
1267 void
1268 hw_pa_bcopy32(uint64_t src, uint64_t dst)
1269 {}
1270 #else   /*!lint */
1271 
1272         /*
1273          * Copy 32 bytes of data from src (%o0) to dst (%o1)
1274          * using physical addresses.
1275          */
1276         ENTRY_NP(hw_pa_bcopy32)
1277         rdpr    %pstate, %g1
1278         andn    %g1, PSTATE_IE, %g2
1279         wrpr    %g0, %g2, %pstate
1280 
1281         ldxa    [%o0]ASI_MEM, %o2
1282         add     %o0, 8, %o0
1283         ldxa    [%o0]ASI_MEM, %o3
1284         add     %o0, 8, %o0
1285         ldxa    [%o0]ASI_MEM, %o4
1286         add     %o0, 8, %o0
1287         ldxa    [%o0]ASI_MEM, %o5
1288         stxa    %o2, [%o1]ASI_MEM
1289         add     %o1, 8, %o1
1290         stxa    %o3, [%o1]ASI_MEM
1291         add     %o1, 8, %o1
1292         stxa    %o4, [%o1]ASI_MEM
1293         add     %o1, 8, %o1
1294         stxa    %o5, [%o1]ASI_MEM
1295 
1296         membar  #Sync
1297         retl
1298           wrpr    %g0, %g1, %pstate
1299         SET_SIZE(hw_pa_bcopy32)
1300 #endif /* lint */
1301 
1302 /*
1303  * Zero a block of storage.
1304  *
1305  * uzero is used by the kernel to zero a block in user address space.
1306  */
1307 
1308 
1309 #if defined(lint)
1310 
1311 /* ARGSUSED */
1312 int
1313 kzero(void *addr, size_t count)
1314 { return(0); }
1315 
1316 /* ARGSUSED */
1317 void
1318 uzero(void *addr, size_t count)
1319 {}
1320 
1321 #else   /* lint */
1322 
1323         ENTRY(uzero)
1324         !
1325         ! Set a new lo_fault handler only if we came in with one
1326         ! already specified.
1327         !
1328         wr      %g0, ASI_USER, %asi
1329         ldn     [THREAD_REG + T_LOFAULT], %o5
1330         tst     %o5
1331         bz,pt   %ncc, .do_zero
1332         sethi   %hi(.zeroerr), %o2
1333         or      %o2, %lo(.zeroerr), %o2
1334         membar  #Sync
1335         ba,pt   %ncc, .do_zero
1336         stn     %o2, [THREAD_REG + T_LOFAULT]
1337 
1338         ENTRY(kzero)
1339         !
1340         ! Always set a lo_fault handler
1341         !
1342         wr      %g0, ASI_P, %asi
1343         ldn     [THREAD_REG + T_LOFAULT], %o5
1344         sethi   %hi(.zeroerr), %o2
1345         or      %o5, LOFAULT_SET, %o5
1346         or      %o2, %lo(.zeroerr), %o2
1347         membar  #Sync
1348         ba,pt   %ncc, .do_zero
1349         stn     %o2, [THREAD_REG + T_LOFAULT]
1350 
1351 /*
1352  * We got here because of a fault during kzero or if
1353  * uzero or bzero was called with t_lofault non-zero.
1354  * Otherwise we've already run screaming from the room.
1355  * Errno value is in %g1. Note that we're here iff
1356  * we did set t_lofault.
1357  */
1358 .zeroerr:
1359         !
1360         ! Undo asi register setting. Just set it to be the
1361         ! kernel default without checking.
1362         !
1363         wr      %g0, ASI_P, %asi
1364 
1365         !
1366         ! We did set t_lofault. It may well have been zero coming in.
1367         !
1368 1:
1369         tst     %o5
1370         membar #Sync
1371         bne,pn  %ncc, 3f                
1372         andncc  %o5, LOFAULT_SET, %o5
1373 2:
1374         !
1375         ! Old handler was zero. Just return the error.
1376         !
1377         retl                            ! return
1378         mov     %g1, %o0                ! error code from %g1
1379 3:
1380         !
1381         ! We're here because %o5 was non-zero. It was non-zero
1382         ! because either LOFAULT_SET was present, a previous fault
1383         ! handler was present or both. In all cases we need to reset
1384         ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
1385         ! before we either simply return the error or we invoke the
1386         ! previously specified handler.
1387         !
1388         be      %ncc, 2b
1389         stn     %o5, [THREAD_REG + T_LOFAULT]
1390         jmp     %o5                     ! goto real handler
1391           nop
1392         SET_SIZE(kzero)
1393         SET_SIZE(uzero)
1394 
1395 #endif  /* lint */
1396 
1397 /*
1398  * Zero a block of storage.
1399  */
1400 
1401 #if defined(lint)
1402 
1403 /* ARGSUSED */
1404 void
1405 bzero(void *addr, size_t count)
1406 {}
1407 
1408 #else   /* lint */
1409 
1410         ENTRY(bzero)
1411         wr      %g0, ASI_P, %asi
1412 
1413         ldn     [THREAD_REG + T_LOFAULT], %o5   ! save old vector
1414         tst     %o5
1415         bz,pt   %ncc, .do_zero
1416         sethi   %hi(.zeroerr), %o2
1417         or      %o2, %lo(.zeroerr), %o2
1418         membar  #Sync                           ! sync error barrier
1419         stn     %o2, [THREAD_REG + T_LOFAULT]   ! install new vector
1420 
1421 .do_zero:
1422         cmp     %o1, 7
1423         blu,pn  %ncc, .byteclr
1424         nop
1425 
1426         cmp     %o1, 15
1427         blu,pn  %ncc, .wdalign
1428         nop
1429 
1430         andcc   %o0, 7, %o3             ! is add aligned on a 8 byte bound
1431         bz,pt   %ncc, .blkalign         ! already double aligned
1432         sub     %o3, 8, %o3             ! -(bytes till double aligned)
1433         add     %o1, %o3, %o1           ! update o1 with new count
1434 
1435 1:
1436         stba    %g0, [%o0]%asi
1437         inccc   %o3
1438         bl,pt   %ncc, 1b
1439         inc     %o0
1440 
1441         ! Now address is double aligned
1442 .blkalign:
1443         cmp     %o1, 0x80               ! check if there are 128 bytes to set
1444         blu,pn  %ncc, .bzero_small
1445         mov     %o1, %o3
1446 
1447         andcc   %o0, 0x3f, %o3          ! is block aligned?
1448         bz,pt   %ncc, .bzero_blk
1449         sub     %o3, 0x40, %o3          ! -(bytes till block aligned)
1450         add     %o1, %o3, %o1           ! o1 is the remainder
1451         
1452         ! Clear -(%o3) bytes till block aligned
1453 1:
1454         stxa    %g0, [%o0]%asi
1455         addcc   %o3, 8, %o3
1456         bl,pt   %ncc, 1b
1457         add     %o0, 8, %o0
1458 
1459 .bzero_blk:
1460         and     %o1, 0x3f, %o3          ! calc bytes left after blk clear
1461         andn    %o1, 0x3f, %o4          ! calc size of blocks in bytes
1462 
1463         cmp     %o4, 0x100              ! 256 bytes or more
1464         blu,pn  %ncc, 3f
1465         nop
1466 
1467 2:
1468         stxa    %g0, [%o0+0x0]%asi
1469         stxa    %g0, [%o0+0x40]%asi
1470         stxa    %g0, [%o0+0x80]%asi
1471         stxa    %g0, [%o0+0xc0]%asi
1472 
1473         stxa    %g0, [%o0+0x8]%asi
1474         stxa    %g0, [%o0+0x10]%asi
1475         stxa    %g0, [%o0+0x18]%asi
1476         stxa    %g0, [%o0+0x20]%asi
1477         stxa    %g0, [%o0+0x28]%asi
1478         stxa    %g0, [%o0+0x30]%asi
1479         stxa    %g0, [%o0+0x38]%asi
1480 
1481         stxa    %g0, [%o0+0x48]%asi
1482         stxa    %g0, [%o0+0x50]%asi
1483         stxa    %g0, [%o0+0x58]%asi
1484         stxa    %g0, [%o0+0x60]%asi
1485         stxa    %g0, [%o0+0x68]%asi
1486         stxa    %g0, [%o0+0x70]%asi
1487         stxa    %g0, [%o0+0x78]%asi
1488 
1489         stxa    %g0, [%o0+0x88]%asi
1490         stxa    %g0, [%o0+0x90]%asi
1491         stxa    %g0, [%o0+0x98]%asi
1492         stxa    %g0, [%o0+0xa0]%asi
1493         stxa    %g0, [%o0+0xa8]%asi
1494         stxa    %g0, [%o0+0xb0]%asi
1495         stxa    %g0, [%o0+0xb8]%asi
1496 
1497         stxa    %g0, [%o0+0xc8]%asi
1498         stxa    %g0, [%o0+0xd0]%asi
1499         stxa    %g0, [%o0+0xd8]%asi
1500         stxa    %g0, [%o0+0xe0]%asi
1501         stxa    %g0, [%o0+0xe8]%asi
1502         stxa    %g0, [%o0+0xf0]%asi
1503         stxa    %g0, [%o0+0xf8]%asi
1504 
1505         sub     %o4, 0x100, %o4
1506         cmp     %o4, 0x100
1507         bgu,pt  %ncc, 2b
1508         add     %o0, 0x100, %o0
1509 
1510 3:
1511         ! ... check if 64 bytes to set
1512         cmp     %o4, 0x40
1513         blu     %ncc, .bzero_blk_done
1514         nop
1515 
1516 4:
1517         stxa    %g0, [%o0+0x0]%asi
1518         stxa    %g0, [%o0+0x8]%asi
1519         stxa    %g0, [%o0+0x10]%asi
1520         stxa    %g0, [%o0+0x18]%asi
1521         stxa    %g0, [%o0+0x20]%asi
1522         stxa    %g0, [%o0+0x28]%asi
1523         stxa    %g0, [%o0+0x30]%asi
1524         stxa    %g0, [%o0+0x38]%asi
1525 
1526         subcc   %o4, 0x40, %o4
1527         bgu,pt  %ncc, 3b
1528         add     %o0, 0x40, %o0
1529 
1530 .bzero_blk_done:
1531         membar  #Sync
1532 
1533 .bzero_small:
1534         ! Set the remaining doubles
1535         subcc   %o3, 8, %o3             ! Can we store any doubles?
1536         blu,pn  %ncc, .byteclr
1537         and     %o1, 7, %o1             ! calc bytes left after doubles
1538 
1539 .dbclr:
1540         stxa    %g0, [%o0]%asi          ! Clear the doubles
1541         subcc   %o3, 8, %o3
1542         bgeu,pt %ncc, .dbclr
1543         add     %o0, 8, %o0
1544 
1545         ba      .byteclr
1546         nop
1547 
1548 .wdalign:                       
1549         andcc   %o0, 3, %o3             ! is add aligned on a word boundary
1550         bz,pn   %ncc, .wdclr
1551         andn    %o1, 3, %o3             ! create word sized count in %o3
1552 
1553         dec     %o1                     ! decrement count
1554         stba    %g0, [%o0]%asi          ! clear a byte
1555         ba      .wdalign
1556         inc     %o0                     ! next byte
1557 
1558 .wdclr:
1559         sta     %g0, [%o0]%asi          ! 4-byte clearing loop
1560         subcc   %o3, 4, %o3
1561         bnz,pt  %ncc, .wdclr
1562         inc     4, %o0
1563 
1564         and     %o1, 3, %o1             ! leftover count, if any
1565 
1566 .byteclr:
1567         ! Set the leftover bytes
1568         brz     %o1, .bzero_exit
1569         nop
1570 
1571 7:
1572         deccc   %o1                     ! byte clearing loop
1573         stba    %g0, [%o0]%asi
1574         bgu,pt  %ncc, 7b
1575         inc     %o0
1576 
1577 .bzero_exit:
1578         !
1579         ! We're just concerned with whether t_lofault was set
1580         ! when we came in. We end up here from either kzero()
1581         ! or bzero(). kzero() *always* sets a lofault handler.
1582         ! It ors LOFAULT_SET into %o5 to indicate it has done
1583         ! this even if the value of %o5 is otherwise zero.
1584         ! bzero() sets a lofault handler *only* if one was
1585         ! previously set. Accordingly we need to examine
1586         ! %o5 and if it is non-zero be sure to clear LOFAULT_SET
1587         ! before resetting the error handler.
1588         !
1589         tst     %o5
1590         bz      %ncc, 1f
1591         andn    %o5, LOFAULT_SET, %o5
1592         membar  #Sync                           ! sync error barrier
1593         stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1594 1:
1595         retl
1596         clr     %o0                     ! return (0)
1597 
1598         SET_SIZE(bzero)
1599 #endif  /* lint */