Print this page
    
de-linting of .s files
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/sun4v/cpu/generic_copy.s
          +++ new/usr/src/uts/sun4v/cpu/generic_copy.s
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  # ident "%Z%%M% %I%     %E% SMI"
  27   27  
  
    | ↓ open down ↓ | 27 lines elided | ↑ open up ↑ | 
  28   28  #include <sys/param.h>
  29   29  #include <sys/errno.h>
  30   30  #include <sys/asm_linkage.h>
  31   31  #include <sys/vtrace.h>
  32   32  #include <sys/machthread.h>
  33   33  #include <sys/clock.h>
  34   34  #include <sys/asi.h>
  35   35  #include <sys/fsr.h>
  36   36  #include <sys/privregs.h>
  37   37  
  38      -#if !defined(lint)
  39   38  #include "assym.h"
  40      -#endif  /* lint */
  41   39  
  42   40  
  43   41  /*
  44   42   * Less then or equal this number of bytes we will always copy byte-for-byte
  45   43   */
  46   44  #define SMALL_LIMIT     7
  47   45  
  48   46  /*
  49   47   * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
  50   48   * handler was set
  51   49   */
  52   50  #define LOFAULT_SET 2
  
    | ↓ open down ↓ | 2 lines elided | ↑ open up ↑ | 
  53   51  
  54   52  
  55   53  /*
  56   54   * Copy a block of storage, returning an error code if `from' or
  57   55   * `to' takes a kernel pagefault which cannot be resolved.
  58   56   * Returns errno value on pagefault error, 0 if all ok
  59   57   */
  60   58  
  61   59  
  62   60  
  63      -#if defined(lint)
  64      -
  65      -/* ARGSUSED */
  66      -int
  67      -kcopy(const void *from, void *to, size_t count)
  68      -{ return(0); }
  69      -
  70      -#else   /* lint */
  71      -
  72   61          .seg    ".text"
  73   62          .align  4
  74   63  
  75   64          ENTRY(kcopy)
  76   65  
  77   66          save    %sp, -SA(MINFRAME), %sp
  78   67          set     .copyerr, %l7                   ! copyerr is lofault value
  79   68          ldn     [THREAD_REG + T_LOFAULT], %o5   ! save existing handler
  80   69          or      %o5, LOFAULT_SET, %o5
  81   70          membar  #Sync                           ! sync error barrier
  82   71          b       .do_copy                        ! common code
  83   72          stn     %l7, [THREAD_REG + T_LOFAULT]   ! set t_lofault
  84   73  
  85   74  /*
  86   75   * We got here because of a fault during kcopy.
  87   76   * Errno value is in %g1.
  88   77   */
  89   78  .copyerr:
  
    | ↓ open down ↓ | 8 lines elided | ↑ open up ↑ | 
  90   79          ! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
  91   80          ! into %o5 to indicate it has set t_lofault handler. Need to clear
  92   81          ! LOFAULT_SET flag before restoring the error handler.
  93   82          andn    %o5, LOFAULT_SET, %o5
  94   83          membar  #Sync                   ! sync error barrier
  95   84          stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
  96   85          ret
  97   86          restore %g1, 0, %o0
  98   87  
  99   88          SET_SIZE(kcopy)
 100      -#endif  /* lint */
 101   89  
 102   90  
 103   91  /*
 104   92   * Copy a block of storage - must not overlap (from + len <= to).
 105   93   */
 106      -#if defined(lint)
 107   94  
 108      -/* ARGSUSED */
 109      -void
 110      -bcopy(const void *from, void *to, size_t count)
 111      -{}
 112      -
 113      -#else   /* lint */
 114      -
 115   95          ENTRY(bcopy)
 116   96  
 117   97          save    %sp, -SA(MINFRAME), %sp
 118   98          clr     %o5                     ! flag LOFAULT_SET is not set for bcopy
 119   99  
 120  100  .do_copy:
 121  101          mov     %i1, %g5                ! save dest addr start
 122  102  
 123  103          mov     %i2, %l6                ! save size
 124  104  
 125  105          cmp     %i2, 12                 ! for small counts
 126  106          blu     %ncc, .bytecp           ! just copy bytes
 127  107            .empty
 128  108  
 129  109          !
 130  110          ! use aligned transfers where possible
 131  111          !
 132  112          xor     %i0, %i1, %o4           ! xor from and to address
 133  113          btst    7, %o4                  ! if lower three bits zero
 134  114          bz      .aldoubcp               ! can align on double boundary
 135  115          .empty  ! assembler complaints about label
 136  116  
 137  117          xor     %i0, %i1, %o4           ! xor from and to address
 138  118          btst    3, %o4                  ! if lower two bits zero
 139  119          bz      .alwordcp               ! can align on word boundary
 140  120          btst    3, %i0                  ! delay slot, from address unaligned?
 141  121          !
 142  122          ! use aligned reads and writes where possible
 143  123          ! this differs from wordcp in that it copes
 144  124          ! with odd alignment between source and destnation
 145  125          ! using word reads and writes with the proper shifts
 146  126          ! in between to align transfers to and from memory
 147  127          ! i0 - src address, i1 - dest address, i2 - count
 148  128          ! i3, i4 - tmps for used generating complete word
 149  129          ! i5 (word to write)
 150  130          ! l0 size in bits of upper part of source word (US)
 151  131          ! l1 size in bits of lower part of source word (LS = 32 - US)
 152  132          ! l2 size in bits of upper part of destination word (UD)
 153  133          ! l3 size in bits of lower part of destination word (LD = 32 - UD)
 154  134          ! l4 number of bytes leftover after aligned transfers complete
 155  135          ! l5 the number 32
 156  136          !
 157  137          mov     32, %l5                 ! load an oft-needed constant
 158  138          bz      .align_dst_only
 159  139          btst    3, %i1                  ! is destnation address aligned?
 160  140          clr     %i4                     ! clear registers used in either case
 161  141          bz      .align_src_only
 162  142          clr     %l0
 163  143          !
 164  144          ! both source and destination addresses are unaligned
 165  145          !
 166  146  1:                                      ! align source
 167  147          ldub    [%i0], %i3              ! read a byte from source address
 168  148          add     %i0, 1, %i0             ! increment source address
 169  149          or      %i4, %i3, %i4           ! or in with previous bytes (if any)
 170  150          btst    3, %i0                  ! is source aligned?
 171  151          add     %l0, 8, %l0             ! increment size of upper source (US)
 172  152          bnz,a   1b
 173  153          sll     %i4, 8, %i4             ! make room for next byte
 174  154  
 175  155          sub     %l5, %l0, %l1           ! generate shift left count (LS)
 176  156          sll     %i4, %l1, %i4           ! prepare to get rest
 177  157          ld      [%i0], %i3              ! read a word
 178  158          add     %i0, 4, %i0             ! increment source address
 179  159          srl     %i3, %l0, %i5           ! upper src bits into lower dst bits
 180  160          or      %i4, %i5, %i5           ! merge
 181  161          mov     24, %l3                 ! align destination
 182  162  1:
 183  163          srl     %i5, %l3, %i4           ! prepare to write a single byte
 184  164          stb     %i4, [%i1]              ! write a byte
 185  165          add     %i1, 1, %i1             ! increment destination address
 186  166          sub     %i2, 1, %i2             ! decrement count
 187  167          btst    3, %i1                  ! is destination aligned?
 188  168          bnz,a   1b
 189  169          sub     %l3, 8, %l3             ! delay slot, decrement shift count (LD)
 190  170          sub     %l5, %l3, %l2           ! generate shift left count (UD)
 191  171          sll     %i5, %l2, %i5           ! move leftover into upper bytes
 192  172          cmp     %l2, %l0                ! cmp # reqd to fill dst w old src left
 193  173          bgu     %ncc, .more_needed      ! need more to fill than we have
 194  174          nop
 195  175  
 196  176          sll     %i3, %l1, %i3           ! clear upper used byte(s)
 197  177          srl     %i3, %l1, %i3
 198  178          ! get the odd bytes between alignments
 199  179          sub     %l0, %l2, %l0           ! regenerate shift count
 200  180          sub     %l5, %l0, %l1           ! generate new shift left count (LS)
 201  181          and     %i2, 3, %l4             ! must do remaining bytes if count%4 > 0
 202  182          andn    %i2, 3, %i2             ! # of aligned bytes that can be moved
 203  183          srl     %i3, %l0, %i4
 204  184          or      %i5, %i4, %i5
 205  185          st      %i5, [%i1]              ! write a word
 206  186          subcc   %i2, 4, %i2             ! decrement count
 207  187          bz      %ncc, .unalign_out
 208  188          add     %i1, 4, %i1             ! increment destination address
 209  189  
 210  190          b       2f
 211  191          sll     %i3, %l1, %i5           ! get leftover into upper bits
 212  192  .more_needed:
 213  193          sll     %i3, %l0, %i3           ! save remaining byte(s)
 214  194          srl     %i3, %l0, %i3
 215  195          sub     %l2, %l0, %l1           ! regenerate shift count
 216  196          sub     %l5, %l1, %l0           ! generate new shift left count
 217  197          sll     %i3, %l1, %i4           ! move to fill empty space
 218  198          b       3f
 219  199          or      %i5, %i4, %i5           ! merge to complete word
 220  200          !
 221  201          ! the source address is aligned and destination is not
 222  202          !
 223  203  .align_dst_only:
 224  204          ld      [%i0], %i4              ! read a word
 225  205          add     %i0, 4, %i0             ! increment source address
 226  206          mov     24, %l0                 ! initial shift alignment count
 227  207  1:
 228  208          srl     %i4, %l0, %i3           ! prepare to write a single byte
 229  209          stb     %i3, [%i1]              ! write a byte
 230  210          add     %i1, 1, %i1             ! increment destination address
 231  211          sub     %i2, 1, %i2             ! decrement count
 232  212          btst    3, %i1                  ! is destination aligned?
 233  213          bnz,a   1b
 234  214          sub     %l0, 8, %l0             ! delay slot, decrement shift count
 235  215  .xfer:
 236  216          sub     %l5, %l0, %l1           ! generate shift left count
 237  217          sll     %i4, %l1, %i5           ! get leftover
 238  218  3:
 239  219          and     %i2, 3, %l4             ! must do remaining bytes if count%4 > 0
 240  220          andn    %i2, 3, %i2             ! # of aligned bytes that can be moved
 241  221  2:
 242  222          ld      [%i0], %i3              ! read a source word
 243  223          add     %i0, 4, %i0             ! increment source address
 244  224          srl     %i3, %l0, %i4           ! upper src bits into lower dst bits
 245  225          or      %i5, %i4, %i5           ! merge with upper dest bits (leftover)
 246  226          st      %i5, [%i1]              ! write a destination word
 247  227          subcc   %i2, 4, %i2             ! decrement count
 248  228          bz      %ncc, .unalign_out      ! check if done
 249  229          add     %i1, 4, %i1             ! increment destination address
 250  230          b       2b                      ! loop
 251  231          sll     %i3, %l1, %i5           ! get leftover
 252  232  .unalign_out:
 253  233          tst     %l4                     ! any bytes leftover?
 254  234          bz      %ncc, .cpdone
 255  235          .empty                          ! allow next instruction in delay slot
 256  236  1:
 257  237          sub     %l0, 8, %l0             ! decrement shift
 258  238          srl     %i3, %l0, %i4           ! upper src byte into lower dst byte
 259  239          stb     %i4, [%i1]              ! write a byte
 260  240          subcc   %l4, 1, %l4             ! decrement count
 261  241          bz      %ncc, .cpdone           ! done?
 262  242          add     %i1, 1, %i1             ! increment destination
 263  243          tst     %l0                     ! any more previously read bytes
 264  244          bnz     %ncc, 1b                ! we have leftover bytes
 265  245          mov     %l4, %i2                ! delay slot, mv cnt where dbytecp wants
 266  246          b       .dbytecp                ! let dbytecp do the rest
 267  247          sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 268  248          !
 269  249          ! the destination address is aligned and the source is not
 270  250          !
 271  251  .align_src_only:
 272  252          ldub    [%i0], %i3              ! read a byte from source address
 273  253          add     %i0, 1, %i0             ! increment source address
 274  254          or      %i4, %i3, %i4           ! or in with previous bytes (if any)
 275  255          btst    3, %i0                  ! is source aligned?
 276  256          add     %l0, 8, %l0             ! increment shift count (US)
 277  257          bnz,a   .align_src_only
 278  258          sll     %i4, 8, %i4             ! make room for next byte
 279  259          b,a     .xfer
 280  260          !
 281  261          ! if from address unaligned for double-word moves,
 282  262          ! move bytes till it is, if count is < 56 it could take
 283  263          ! longer to align the thing than to do the transfer
 284  264          ! in word size chunks right away
 285  265          !
 286  266  .aldoubcp:
 287  267          cmp     %i2, 56                 ! if count < 56, use wordcp, it takes
 288  268          blu,a   %ncc, .alwordcp         ! longer to align doubles than words
 289  269          mov     3, %o0                  ! mask for word alignment
 290  270          call    .alignit                ! copy bytes until aligned
 291  271          mov     7, %o0                  ! mask for double alignment
 292  272          !
 293  273          ! source and destination are now double-word aligned
 294  274          ! i3 has aligned count returned by alignit
 295  275          !
 296  276          and     %i2, 7, %i2             ! unaligned leftover count
 297  277          sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 298  278  5:
 299  279          ldx     [%i0+%i1], %o4          ! read from address
 300  280          stx     %o4, [%i1]              ! write at destination address
 301  281          subcc   %i3, 8, %i3             ! dec count
 302  282          bgu     %ncc, 5b
 303  283          add     %i1, 8, %i1             ! delay slot, inc to address
 304  284          cmp     %i2, 4                  ! see if we can copy a word
 305  285          blu     %ncc, .dbytecp          ! if 3 or less bytes use bytecp
 306  286          .empty
 307  287          !
 308  288          ! for leftover bytes we fall into wordcp, if needed
 309  289          !
 310  290  .wordcp:
 311  291          and     %i2, 3, %i2             ! unaligned leftover count
 312  292  5:
 313  293          ld      [%i0+%i1], %o4          ! read from address
 314  294          st      %o4, [%i1]              ! write at destination address
 315  295          subcc   %i3, 4, %i3             ! dec count
 316  296          bgu     %ncc, 5b
 317  297          add     %i1, 4, %i1             ! delay slot, inc to address
 318  298          b,a     .dbytecp
 319  299  
 320  300          ! we come here to align copies on word boundaries
 321  301  .alwordcp:
 322  302          call    .alignit                ! go word-align it
 323  303          mov     3, %o0                  ! bits that must be zero to be aligned
 324  304          b       .wordcp
 325  305          sub     %i0, %i1, %i0           ! i0 gets the difference of src and dst
 326  306  
 327  307          !
 328  308          ! byte copy, works with any alignment
 329  309          !
 330  310  .bytecp:
 331  311          b       .dbytecp
 332  312          sub     %i0, %i1, %i0           ! i0 gets difference of src and dst
 333  313  
 334  314          !
 335  315          ! differenced byte copy, works with any alignment
 336  316          ! assumes dest in %i1 and (source - dest) in %i0
 337  317          !
 338  318  1:
 339  319          stb     %o4, [%i1]              ! write to address
 340  320          inc     %i1                     ! inc to address
 341  321  .dbytecp:
 342  322          deccc   %i2                     ! dec count
 343  323          bgeu,a  %ncc, 1b                ! loop till done
 344  324          ldub    [%i0+%i1], %o4          ! read from address
 345  325  .cpdone:
 346  326          membar  #Sync                           ! sync error barrier
 347  327          ! Restore t_lofault handler, if came here from kcopy().
 348  328          tst     %o5
 349  329          bz      %ncc, 1f
 350  330          andn    %o5, LOFAULT_SET, %o5
 351  331          stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 352  332  1:
 353  333          mov     %g5, %o0                ! copy dest address
 354  334          call    sync_icache
 355  335          mov     %l6, %o1                ! saved size
 356  336          ret
 357  337          restore %g0, 0, %o0             ! return (0)
 358  338  
 359  339  /*
 360  340   * Common code used to align transfers on word and doubleword
 361  341   * boudaries.  Aligns source and destination and returns a count
 362  342   * of aligned bytes to transfer in %i3
 363  343   */
 364  344  1:
 365  345          inc     %i0                     ! inc from
 366  346          stb     %o4, [%i1]              ! write a byte
 367  347          inc     %i1                     ! inc to
  
    | ↓ open down ↓ | 243 lines elided | ↑ open up ↑ | 
 368  348          dec     %i2                     ! dec count
 369  349  .alignit:
 370  350          btst    %o0, %i0                ! %o0 is bit mask to check for alignment
 371  351          bnz,a   1b
 372  352          ldub    [%i0], %o4              ! read next byte
 373  353  
 374  354          retl
 375  355          andn    %i2, %o0, %i3           ! return size of aligned bytes
 376  356          SET_SIZE(bcopy)
 377  357  
 378      -#endif  /* lint */
 379      -
 380  358  /*
 381  359   * Block copy with possibly overlapped operands.
 382  360   */
 383  361  
 384      -#if defined(lint)
 385      -
 386      -/*ARGSUSED*/
 387      -void
 388      -ovbcopy(const void *from, void *to, size_t count)
 389      -{}
 390      -
 391      -#else   /* lint */
 392      -
 393  362          ENTRY(ovbcopy)
 394  363          tst     %o2                     ! check count
 395  364          bgu,a   %ncc, 1f                ! nothing to do or bad arguments
 396  365          subcc   %o0, %o1, %o3           ! difference of from and to address
 397  366  
 398  367          retl                            ! return
 399  368          nop
 400  369  1:
 401  370          bneg,a  %ncc, 2f
 402  371          neg     %o3                     ! if < 0, make it positive
 403  372  2:      cmp     %o2, %o3                ! cmp size and abs(from - to)
 404  373          bleu    %ncc, bcopy             ! if size <= abs(diff): use bcopy,
 405  374          .empty                          !   no overlap
 406  375          cmp     %o0, %o1                ! compare from and to addresses
 407  376          blu     %ncc, .ov_bkwd          ! if from < to, copy backwards
 408  377          nop
 409  378          !
 410  379          ! Copy forwards.
 411  380          !
 412  381  .ov_fwd:
 413  382          ldub    [%o0], %o3              ! read from address
 414  383          inc     %o0                     ! inc from address
 415  384          stb     %o3, [%o1]              ! write to address
 416  385          deccc   %o2                     ! dec count
 417  386          bgu     %ncc, .ov_fwd           ! loop till done
 418  387          inc     %o1                     ! inc to address
 419  388  
 420  389          retl                            ! return
 421  390          nop
 422  391          !
 423  392          ! Copy backwards.
 424  393          !
  
    | ↓ open down ↓ | 22 lines elided | ↑ open up ↑ | 
 425  394  .ov_bkwd:
 426  395          deccc   %o2                     ! dec count
 427  396          ldub    [%o0 + %o2], %o3        ! get byte at end of src
 428  397          bgu     %ncc, .ov_bkwd          ! loop till done
 429  398          stb     %o3, [%o1 + %o2]        ! delay slot, store at end of dst
 430  399  
 431  400          retl                            ! return
 432  401          nop
 433  402          SET_SIZE(ovbcopy)
 434  403  
 435      -#endif  /* lint */
 436      -
 437  404  /*
 438  405   * hwblkpagecopy()
 439  406   *
 440  407   * Copies exactly one page.  This routine assumes the caller (ppcopy)
 441  408   * has already disabled kernel preemption and has checked
 442  409   * use_hw_bcopy.
 443  410   */
 444      -#ifdef lint
 445      -/*ARGSUSED*/
 446      -void
 447      -hwblkpagecopy(const void *src, void *dst)
 448      -{ }
 449      -#else /* lint */
 450  411          ENTRY(hwblkpagecopy)
 451  412          save    %sp, -SA(MINFRAME), %sp
 452  413  
 453  414          ! %i0 - source address (arg)
 454  415          ! %i1 - destination address (arg)
 455  416          ! %i2 - length of region (not arg)
 456  417  
 457  418          set     PAGESIZE, %i2
 458  419          mov     %i1,    %o0     ! store destination address for flushing
 459  420  
 460  421          /*
 461  422           * Copying exactly one page and PAGESIZE is in mutliple of 0x80. 
 462  423           */
 463  424  1:
 464  425          ldx     [%i0+0x0], %l0
 465  426          ldx     [%i0+0x8], %l1
 466  427          ldx     [%i0+0x10], %l2
 467  428          ldx     [%i0+0x18], %l3
 468  429          ldx     [%i0+0x20], %l4
 469  430          ldx     [%i0+0x28], %l5
 470  431          ldx     [%i0+0x30], %l6
 471  432          ldx     [%i0+0x38], %l7
 472  433          stx     %l0, [%i1+0x0]
 473  434          stx     %l1, [%i1+0x8]
 474  435          stx     %l2, [%i1+0x10]
 475  436          stx     %l3, [%i1+0x18]
 476  437          stx     %l4, [%i1+0x20]
 477  438          stx     %l5, [%i1+0x28]
 478  439          stx     %l6, [%i1+0x30]
 479  440          stx     %l7, [%i1+0x38]
 480  441  
 481  442          ldx     [%i0+0x40], %l0
 482  443          ldx     [%i0+0x48], %l1
 483  444          ldx     [%i0+0x50], %l2
 484  445          ldx     [%i0+0x58], %l3
 485  446          ldx     [%i0+0x60], %l4
 486  447          ldx     [%i0+0x68], %l5
 487  448          ldx     [%i0+0x70], %l6
 488  449          ldx     [%i0+0x78], %l7
 489  450          stx     %l0, [%i1+0x40]
 490  451          stx     %l1, [%i1+0x48]
 491  452          stx     %l2, [%i1+0x50]
 492  453          stx     %l3, [%i1+0x58]
 493  454          stx     %l4, [%i1+0x60]
 494  455          stx     %l5, [%i1+0x68]
 495  456          stx     %l6, [%i1+0x70]
 496  457          stx     %l7, [%i1+0x78]
 497  458  
 498  459          add     %i0, 0x80, %i0
 499  460          subcc   %i2, 0x80, %i2
 500  461          bgu,pt  %xcc, 1b
 501  462          add     %i1, 0x80, %i1
  
    | ↓ open down ↓ | 42 lines elided | ↑ open up ↑ | 
 502  463  
 503  464          ! %o0 contains the dest. address
 504  465          set     PAGESIZE, %o1
 505  466          call    sync_icache
 506  467          nop
 507  468  
 508  469          membar #Sync
 509  470          ret
 510  471          restore %g0, 0, %o0
 511  472          SET_SIZE(hwblkpagecopy)
 512      -#endif  /* lint */
 513  473  
 514  474  
 515  475  /*
 516  476   * Transfer data to and from user space -
 517  477   * Note that these routines can cause faults
 518  478   * It is assumed that the kernel has nothing at
 519  479   * less than KERNELBASE in the virtual address space.
 520  480   *
 521  481   * Note that copyin(9F) and copyout(9F) are part of the
 522  482   * DDI/DKI which specifies that they return '-1' on "errors."
 523  483   *
 524  484   * Sigh.
 525  485   *
 526  486   * So there's two extremely similar routines - xcopyin() and xcopyout()
 527  487   * which return the errno that we've faithfully computed.  This
 528  488   * allows other callers (e.g. uiomove(9F)) to work correctly.
 529  489   * Given that these are used pretty heavily, we expand the calling
 530  490   * sequences inline for all flavours (rather than making wrappers).
 531  491   *
 532  492   * There are also stub routines for xcopyout_little and xcopyin_little,
 533  493   * which currently are intended to handle requests of <= 16 bytes from
 534  494   * do_unaligned. Future enhancement to make them handle 8k pages efficiently
 535  495   * is left as an exercise...
 536  496   */
 537  497  
 538  498  /*
 539  499   * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
 540  500   *
 541  501   * General theory of operation:
 542  502   *
 543  503   * None of the copyops routines grab a window.
 544  504   *
 545  505   * Flow:
 546  506   *
 547  507   * If count == zero return zero.
 548  508   *
 549  509   * Store the previous lo_fault handler into %g6.
 550  510   * Place our secondary lofault handler into %g5.
 551  511   * Place the address of our fault handler into %o3.
 552  512   *
 553  513   * If count is less than or equal to SMALL_LIMIT (7) we
 554  514   * always do a byte for byte copy.
 555  515   *
 556  516   * If count is > SMALL_LIMIT, we check the alignment of the input
 557  517   * and output pointers.  We store -count in %o3, we store the number
 558  518   * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
 559  519   * in %o2. Following this we branch to the appropriate copy loop and
 560  520   * copy that many chunks.  Since we've been adding the chunk size
 561  521   * to %o3 each time through as well as decrementing %o2, we can tell
 562  522   * if any data is is left to be copied by examining %o3. If that is
 563  523   * zero, we're done and can go home. If not, we figure out what the
 564  524   * largest chunk size left to be copied is and branch to that copy
 565  525   * loop unless there's only one byte left. We load that as we're
 566  526   * branching to code that stores it just before we return.
 567  527   *
 568  528   * Fault handlers are invoked if we reference memory that has no
 569  529   * current mapping.  All forms share the same copyio_fault handler.
 570  530   * This routine handles fixing up the stack and general housecleaning.
 571  531   * Each copy operation has a simple fault handler that is then called
  
    | ↓ open down ↓ | 49 lines elided | ↑ open up ↑ | 
 572  532   * to do the work specific to the invidual operation.  The handler
 573  533   * for copyOP and xcopyOP are found at the end of individual function.
 574  534   * The handlers for xcopyOP_little are found at the end of xcopyin_little.
 575  535   * The handlers for copyOP_noerr are found at the end of copyin_noerr.
 576  536   */
 577  537  
 578  538  /*
 579  539   * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
 580  540   */
 581  541  
 582      -#if defined(lint)
 583      -
 584      -/*ARGSUSED*/
 585      -int
 586      -copyout(const void *kaddr, void *uaddr, size_t count)
 587      -{ return (0); }
 588      -
 589      -#else   /* lint */
 590      -
 591  542  /*
 592  543   * We save the arguments in the following registers in case of a fault:
 593  544   *      kaddr - %g2
 594  545   *      uaddr - %g3
 595  546   *      count - %g4
 596  547   */
 597  548  #define SAVE_SRC        %g2
 598  549  #define SAVE_DST        %g3
 599  550  #define SAVE_COUNT      %g4
 600  551  
 601  552  #define REAL_LOFAULT            %g5
 602  553  #define SAVED_LOFAULT           %g6
 603  554  
 604  555  /*
 605  556   * Generic copyio fault handler.  This is the first line of defense when a 
 606  557   * fault occurs in (x)copyin/(x)copyout.  In order for this to function
 607  558   * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
 608  559   * This allows us to share common code for all the flavors of the copy
 609  560   * operations, including the _noerr versions.
 610  561   *
 611  562   * Note that this function will restore the original input parameters before
 612  563   * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
 613  564   * member of the t_copyop structure, if needed.
 614  565   */
 615  566          ENTRY(copyio_fault)
 616  567          membar  #Sync
 617  568          stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
 618  569  
 619  570          mov     SAVE_SRC, %o0
 620  571          mov     SAVE_DST, %o1
 621  572          jmp     REAL_LOFAULT
 622  573            mov   SAVE_COUNT, %o2
 623  574          SET_SIZE(copyio_fault)
 624  575  
 625  576          ENTRY(copyout)
 626  577          sethi   %hi(.copyout_err), REAL_LOFAULT
 627  578          or      REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
 628  579  
 629  580  .do_copyout:
 630  581          !
 631  582          ! Check the length and bail if zero.
 632  583          !
 633  584          tst     %o2
 634  585          bnz,pt  %ncc, 1f
 635  586            nop
 636  587          retl
 637  588            clr   %o0
 638  589  1:
 639  590          sethi   %hi(copyio_fault), %o3
 640  591          ldn     [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
 641  592          or      %o3, %lo(copyio_fault), %o3
 642  593          membar  #Sync
 643  594          stn     %o3, [THREAD_REG + T_LOFAULT]
 644  595  
 645  596          mov     %o0, SAVE_SRC
 646  597          mov     %o1, SAVE_DST
 647  598          mov     %o2, SAVE_COUNT
 648  599  
 649  600          !
 650  601          ! Check to see if we're more than SMALL_LIMIT (7 bytes).
 651  602          ! Run in leaf mode, using the %o regs as our input regs.
 652  603          !
 653  604          subcc   %o2, SMALL_LIMIT, %o3
 654  605          bgu,a,pt %ncc, .dco_ns
 655  606          or      %o0, %o1, %o3
 656  607  
 657  608  .dcobcp:
 658  609          sub     %g0, %o2, %o3           ! negate count
 659  610          add     %o0, %o2, %o0           ! make %o0 point at the end
 660  611          add     %o1, %o2, %o1           ! make %o1 point at the end
 661  612          ba,pt   %ncc, .dcocl
 662  613          ldub    [%o0 + %o3], %o4        ! load first byte
 663  614          !
 664  615          ! %o0 and %o2 point at the end and remain pointing at the end
 665  616          ! of their buffers. We pull things out by adding %o3 (which is
 666  617          ! the negation of the length) to the buffer end which gives us
 667  618          ! the curent location in the buffers. By incrementing %o3 we walk
 668  619          ! through both buffers without having to bump each buffer's
 669  620          ! pointer. A very fast 4 instruction loop.
 670  621          !
 671  622          .align 16
 672  623  .dcocl:
 673  624          stba    %o4, [%o1 + %o3]ASI_USER
 674  625          inccc   %o3
 675  626          bl,a,pt %ncc, .dcocl
 676  627          ldub    [%o0 + %o3], %o4
 677  628          !
 678  629          ! We're done. Go home.
 679  630          !
 680  631          membar  #Sync
 681  632          stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
 682  633          retl
 683  634          clr     %o0
 684  635          !
 685  636          ! Try aligned copies from here.
 686  637          !
 687  638  .dco_ns:
 688  639          ! %o0 = kernel addr (to be copied from)
 689  640          ! %o1 = user addr (to be copied to)
 690  641          ! %o2 = length
 691  642          ! %o3 = %o1 | %o2 (used for alignment checking)
 692  643          ! %o4 is alternate lo_fault
 693  644          ! %o5 is original lo_fault
 694  645          !
 695  646          ! See if we're single byte aligned. If we are, check the
 696  647          ! limit for single byte copies. If we're smaller or equal,
 697  648          ! bounce to the byte for byte copy loop. Otherwise do it in
 698  649          ! HW (if enabled).
 699  650          !
 700  651          btst    1, %o3
 701  652          bz,pt   %icc, .dcoh8
 702  653          btst    7, %o3
 703  654  
 704  655          ba      .dcobcp
 705  656          nop
 706  657  .dcoh8:
 707  658          !
 708  659          ! 8 byte aligned?
 709  660          !
 710  661          bnz,a   %ncc, .dcoh4
 711  662          btst    3, %o3
 712  663  .dcos8:
 713  664          !
 714  665          ! Housekeeping for copy loops. Uses same idea as in the byte for
 715  666          ! byte copy loop above.
 716  667          !
 717  668          add     %o0, %o2, %o0
 718  669          add     %o1, %o2, %o1
 719  670          sub     %g0, %o2, %o3
 720  671          ba,pt   %ncc, .dodebc
 721  672          srl     %o2, 3, %o2             ! Number of 8 byte chunks to copy
 722  673          !
 723  674          ! 4 byte aligned?
 724  675          !
 725  676  .dcoh4:
 726  677          bnz,pn  %ncc, .dcoh2
 727  678          nop
 728  679  .dcos4:
 729  680          add     %o0, %o2, %o0
 730  681          add     %o1, %o2, %o1
 731  682          sub     %g0, %o2, %o3
 732  683          ba,pt   %ncc, .dodfbc
 733  684          srl     %o2, 2, %o2             ! Number of 4 byte chunks to copy
 734  685          !
 735  686          ! We must be 2 byte aligned. Off we go.
 736  687          ! The check for small copies was done in the
 737  688          ! delay at .dcoh4
 738  689          !
 739  690  .dcoh2:
 740  691  .dcos2:
 741  692          add     %o0, %o2, %o0
 742  693          add     %o1, %o2, %o1
 743  694          sub     %g0, %o2, %o3
 744  695          ba,pt   %ncc, .dodtbc
 745  696          srl     %o2, 1, %o2             ! Number of 2 byte chunks to copy
 746  697  
 747  698  .dodebc:
 748  699          ldx     [%o0 + %o3], %o4
 749  700          deccc   %o2
 750  701          stxa    %o4, [%o1 + %o3]ASI_USER
 751  702          bg,pt   %ncc, .dodebc
 752  703          addcc   %o3, 8, %o3
 753  704          !
 754  705          ! End of copy loop. Check to see if we're done. Most
 755  706          ! eight byte aligned copies end here.
 756  707          !
 757  708          bz,pt   %ncc, .dcofh
 758  709          nop
 759  710          !
 760  711          ! Something is left - do it byte for byte.
 761  712          ! 
 762  713          ba,pt   %ncc, .dcocl
 763  714          ldub    [%o0 + %o3], %o4        ! load next byte
 764  715          !
 765  716          ! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
 766  717          !
 767  718          .align 32
 768  719  .dodfbc:
 769  720          lduw    [%o0 + %o3], %o4
 770  721          deccc   %o2
 771  722          sta     %o4, [%o1 + %o3]ASI_USER
 772  723          bg,pt   %ncc, .dodfbc
 773  724          addcc   %o3, 4, %o3
 774  725          !
 775  726          ! End of copy loop. Check to see if we're done. Most
 776  727          ! four byte aligned copies end here.
 777  728          !
 778  729          bz,pt   %ncc, .dcofh
 779  730          nop
 780  731          !
 781  732          ! Something is left. Do it byte for byte.
 782  733          !
 783  734          ba,pt   %ncc, .dcocl
 784  735          ldub    [%o0 + %o3], %o4        ! load next byte
 785  736          !
 786  737          ! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
 787  738          ! copy.
 788  739          !
 789  740          .align 32
 790  741  .dodtbc:
 791  742          lduh    [%o0 + %o3], %o4
 792  743          deccc   %o2
 793  744          stha    %o4, [%o1 + %o3]ASI_USER
 794  745          bg,pt   %ncc, .dodtbc
 795  746          addcc   %o3, 2, %o3
 796  747          !
 797  748          ! End of copy loop. Anything left?
 798  749          !
 799  750          bz,pt   %ncc, .dcofh
 800  751          nop
 801  752          !
 802  753          ! Deal with the last byte
 803  754          !
 804  755          ldub    [%o0 + %o3], %o4
 805  756          stba    %o4, [%o1 + %o3]ASI_USER
 806  757  .dcofh:
 807  758          membar  #Sync
 808  759          stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
 809  760          retl
 810  761          clr     %o0
 811  762  
 812  763  .copyout_err:
 813  764          ldn     [THREAD_REG + T_COPYOPS], %o4
  
    | ↓ open down ↓ | 213 lines elided | ↑ open up ↑ | 
 814  765          brz     %o4, 2f
 815  766          nop
 816  767          ldn     [%o4 + CP_COPYOUT], %g2
 817  768          jmp     %g2
 818  769          nop
 819  770  2:
 820  771          retl
 821  772          mov     -1, %o0
 822  773          SET_SIZE(copyout)
 823  774  
 824      -#endif  /* lint */
 825  775  
 826      -
 827      -#ifdef  lint
 828      -
 829      -/*ARGSUSED*/
 830      -int
 831      -xcopyout(const void *kaddr, void *uaddr, size_t count)
 832      -{ return (0); }
 833      -
 834      -#else   /* lint */
 835      -
 836  776          ENTRY(xcopyout)
 837  777          sethi   %hi(.xcopyout_err), REAL_LOFAULT
 838  778          b       .do_copyout
 839  779            or    REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
 840  780  .xcopyout_err:
 841  781          ldn     [THREAD_REG + T_COPYOPS], %o4
 842  782          brz     %o4, 2f
 843  783          nop
 844  784          ldn     [%o4 + CP_XCOPYOUT], %g2
 845  785          jmp     %g2
 846  786          nop
 847  787  2:
 848  788          retl
 849  789          mov     %g1, %o0
 850  790          SET_SIZE(xcopyout)
 851  791  
 852      -#endif  /* lint */
 853      -        
 854      -#ifdef  lint
 855      -
 856      -/*ARGSUSED*/
 857      -int
 858      -xcopyout_little(const void *kaddr, void *uaddr, size_t count)
 859      -{ return (0); }
 860      -
 861      -#else   /* lint */
 862      -
 863  792          ENTRY(xcopyout_little)
 864  793          sethi   %hi(.little_err), %o4
 865  794          ldn     [THREAD_REG + T_LOFAULT], %o5
 866  795          or      %o4, %lo(.little_err), %o4
 867  796          membar  #Sync                   ! sync error barrier
 868  797          stn     %o4, [THREAD_REG + T_LOFAULT]
 869  798  
 870  799          subcc   %g0, %o2, %o3
 871  800          add     %o0, %o2, %o0
 872  801          bz,pn   %ncc, 2f                ! check for zero bytes
 873  802          sub     %o2, 1, %o4
 874  803          add     %o0, %o4, %o0           ! start w/last byte
 875  804          add     %o1, %o2, %o1
 876  805          ldub    [%o0+%o3], %o4
 877  806  
 878  807  1:      stba    %o4, [%o1+%o3]ASI_AIUSL
 879  808          inccc   %o3
  
    | ↓ open down ↓ | 7 lines elided | ↑ open up ↑ | 
 880  809          sub     %o0, 2, %o0             ! get next byte
 881  810          bcc,a,pt %ncc, 1b
 882  811            ldub  [%o0+%o3], %o4
 883  812  
 884  813  2:      membar  #Sync                   ! sync error barrier
 885  814          stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
 886  815          retl
 887  816          mov     %g0, %o0                ! return (0)
 888  817          SET_SIZE(xcopyout_little)
 889  818  
 890      -#endif  /* lint */
 891      -
 892  819  /*
 893  820   * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
 894  821   */
 895  822  
 896      -#if defined(lint)
 897      -
 898      -/*ARGSUSED*/
 899      -int
 900      -copyin(const void *uaddr, void *kaddr, size_t count)
 901      -{ return (0); }
 902      -
 903      -#else   /* lint */
 904      -
 905  823          ENTRY(copyin)
 906  824          sethi   %hi(.copyin_err), REAL_LOFAULT
 907  825          or      REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
 908  826  
 909  827  .do_copyin:
 910  828          !
 911  829          ! Check the length and bail if zero.
 912  830          !
 913  831          tst     %o2
 914  832          bnz,pt  %ncc, 1f
 915  833            nop
 916  834          retl
 917  835            clr   %o0
 918  836  1:
 919  837          sethi   %hi(copyio_fault), %o3
 920  838          ldn     [THREAD_REG + T_LOFAULT], SAVED_LOFAULT
 921  839          or      %o3, %lo(copyio_fault), %o3
 922  840          membar  #Sync
 923  841          stn     %o3, [THREAD_REG + T_LOFAULT]
 924  842  
 925  843          mov     %o0, SAVE_SRC
 926  844          mov     %o1, SAVE_DST
 927  845          mov     %o2, SAVE_COUNT
 928  846  
 929  847          !
 930  848          ! Check to see if we're more than SMALL_LIMIT.
 931  849          !
 932  850          subcc   %o2, SMALL_LIMIT, %o3
 933  851          bgu,a,pt %ncc, .dci_ns
 934  852          or      %o0, %o1, %o3
 935  853  
 936  854  .dcibcp:
 937  855          sub     %g0, %o2, %o3           ! setup for copy loop
 938  856          add     %o0, %o2, %o0
 939  857          add     %o1, %o2, %o1
 940  858          ba,pt   %ncc, .dcicl
 941  859          lduba   [%o0 + %o3]ASI_USER, %o4
 942  860          !
 943  861          ! %o0 and %o1 point at the end and remain pointing at the end
 944  862          ! of their buffers. We pull things out by adding %o3 (which is
 945  863          ! the negation of the length) to the buffer end which gives us
 946  864          ! the curent location in the buffers. By incrementing %o3 we walk
 947  865          ! through both buffers without having to bump each buffer's
 948  866          ! pointer. A very fast 4 instruction loop.
 949  867          !
 950  868          .align 16
 951  869  .dcicl:
 952  870          stb     %o4, [%o1 + %o3]
 953  871          inccc   %o3
 954  872          bl,a,pt %ncc, .dcicl
 955  873          lduba   [%o0 + %o3]ASI_USER, %o4
 956  874          !
 957  875          ! We're done. Go home.
 958  876          !       
 959  877          membar  #Sync
 960  878          stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
 961  879          retl
 962  880          clr     %o0
 963  881          !
 964  882          ! Try aligned copies from here.
 965  883          !
 966  884  .dci_ns:
 967  885          !
 968  886          ! See if we're single byte aligned. If we are, check the
 969  887          ! limit for single byte copies. If we're smaller, or equal,
 970  888          ! bounce to the byte for byte copy loop. Otherwise do it in
 971  889          ! HW (if enabled).
 972  890          !
 973  891          btst    1, %o3
 974  892          bz,a,pt %icc, .dcih8
 975  893          btst    7, %o3
 976  894          ba      .dcibcp
 977  895          nop
 978  896  
 979  897  .dcih8:
 980  898          !
 981  899          ! 8 byte aligned?
 982  900          !
 983  901          bnz,a   %ncc, .dcih4
 984  902          btst    3, %o3
 985  903  .dcis8:
 986  904          !
 987  905          ! Housekeeping for copy loops. Uses same idea as in the byte for
 988  906          ! byte copy loop above.
 989  907          !
 990  908          add     %o0, %o2, %o0
 991  909          add     %o1, %o2, %o1
 992  910          sub     %g0, %o2, %o3
 993  911          ba,pt   %ncc, .didebc
 994  912          srl     %o2, 3, %o2             ! Number of 8 byte chunks to copy
 995  913          !
 996  914          ! 4 byte aligned?
 997  915          !
 998  916  .dcih4:
 999  917          bnz     %ncc, .dcih2
1000  918          nop
1001  919  .dcis4:
1002  920          !
1003  921          ! Housekeeping for copy loops. Uses same idea as in the byte
1004  922          ! for byte copy loop above.
1005  923          !
1006  924          add     %o0, %o2, %o0
1007  925          add     %o1, %o2, %o1
1008  926          sub     %g0, %o2, %o3
1009  927          ba,pt   %ncc, .didfbc
1010  928          srl     %o2, 2, %o2             ! Number of 4 byte chunks to copy
1011  929  .dcih2:
1012  930  .dcis2:
1013  931          add     %o0, %o2, %o0
1014  932          add     %o1, %o2, %o1
1015  933          sub     %g0, %o2, %o3
1016  934          ba,pt   %ncc, .didtbc
1017  935          srl     %o2, 1, %o2             ! Number of 2 byte chunks to copy
1018  936  
1019  937  .didebc:
1020  938          ldxa    [%o0 + %o3]ASI_USER, %o4
1021  939          deccc   %o2
1022  940          stx     %o4, [%o1 + %o3]
1023  941          bg,pt   %ncc, .didebc
1024  942          addcc   %o3, 8, %o3
1025  943          !
1026  944          ! End of copy loop. Most 8 byte aligned copies end here.
1027  945          !
1028  946          bz,pt   %ncc, .dcifh
1029  947          nop
1030  948          !
1031  949          ! Something is left. Do it byte for byte.
1032  950          !
1033  951          ba,pt   %ncc, .dcicl
1034  952          lduba   [%o0 + %o3]ASI_USER, %o4
1035  953          !
1036  954          ! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
1037  955          !
1038  956          .align 32
1039  957  .didfbc:
1040  958          lduwa   [%o0 + %o3]ASI_USER, %o4
1041  959          deccc   %o2
1042  960          st      %o4, [%o1 + %o3]
1043  961          bg,pt   %ncc, .didfbc
1044  962          addcc   %o3, 4, %o3
1045  963          !
1046  964          ! End of copy loop. Most 4 byte aligned copies end here.
1047  965          !
1048  966          bz,pt   %ncc, .dcifh
1049  967          nop
1050  968          !
1051  969          ! Something is left. Do it byte for byte.
1052  970          !
1053  971          ba,pt   %ncc, .dcicl
1054  972          lduba   [%o0 + %o3]ASI_USER, %o4
1055  973          !
1056  974          ! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
1057  975          ! copy.
1058  976          !
1059  977          .align 32
1060  978  .didtbc:
1061  979          lduha   [%o0 + %o3]ASI_USER, %o4
1062  980          deccc   %o2
1063  981          sth     %o4, [%o1 + %o3]
1064  982          bg,pt   %ncc, .didtbc
1065  983          addcc   %o3, 2, %o3
1066  984          !
1067  985          ! End of copy loop. Most 2 byte aligned copies end here.
1068  986          !
1069  987          bz,pt   %ncc, .dcifh
1070  988          nop
1071  989          !
1072  990          ! Deal with the last byte
1073  991          !
1074  992          lduba   [%o0 + %o3]ASI_USER, %o4
1075  993          stb     %o4, [%o1 + %o3]
1076  994  .dcifh:
1077  995          membar  #Sync
1078  996          stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1079  997          retl
1080  998          clr     %o0
1081  999  
1082 1000  .copyin_err:
1083 1001          ldn     [THREAD_REG + T_COPYOPS], %o4
  
    | ↓ open down ↓ | 169 lines elided | ↑ open up ↑ | 
1084 1002          brz     %o4, 2f
1085 1003          nop
1086 1004          ldn     [%o4 + CP_COPYIN], %g2
1087 1005          jmp     %g2
1088 1006          nop
1089 1007  2:
1090 1008          retl
1091 1009          mov     -1, %o0
1092 1010          SET_SIZE(copyin)
1093 1011  
1094      -#endif  /* lint */
1095      -
1096      -#ifdef  lint
1097      -
1098      -/*ARGSUSED*/
1099      -int
1100      -xcopyin(const void *uaddr, void *kaddr, size_t count)
1101      -{ return (0); }
1102      -
1103      -#else   /* lint */
1104      -
1105 1012          ENTRY(xcopyin)
1106 1013          sethi   %hi(.xcopyin_err), REAL_LOFAULT
1107 1014          b       .do_copyin
1108 1015            or    REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
1109 1016  .xcopyin_err:
1110 1017          ldn     [THREAD_REG + T_COPYOPS], %o4
1111 1018          brz     %o4, 2f
1112 1019          nop
1113 1020          ldn     [%o4 + CP_XCOPYIN], %g2
1114 1021          jmp     %g2
1115 1022          nop
1116 1023  2:
1117 1024          retl
1118 1025          mov     %g1, %o0
1119 1026          SET_SIZE(xcopyin)
1120 1027  
1121      -#endif  /* lint */
1122      -
1123      -#ifdef  lint
1124      -
1125      -/*ARGSUSED*/
1126      -int
1127      -xcopyin_little(const void *uaddr, void *kaddr, size_t count)
1128      -{ return (0); }
1129      -
1130      -#else   /* lint */
1131      -
1132 1028          ENTRY(xcopyin_little)
1133 1029          sethi   %hi(.little_err), %o4
1134 1030          ldn     [THREAD_REG + T_LOFAULT], %o5
1135 1031          or      %o4, %lo(.little_err), %o4
1136 1032          membar  #Sync                           ! sync error barrier
1137 1033          stn     %o4, [THREAD_REG + T_LOFAULT]   
1138 1034  
1139 1035          subcc   %g0, %o2, %o3
1140 1036          add     %o0, %o2, %o0
1141 1037          bz,pn   %ncc, 2f                ! check for zero bytes
1142 1038          sub     %o2, 1, %o4
1143 1039          add     %o0, %o4, %o0           ! start w/last byte     
1144 1040          add     %o1, %o2, %o1
1145 1041          lduba   [%o0+%o3]ASI_AIUSL, %o4
1146 1042  
1147 1043  1:      stb     %o4, [%o1+%o3]
1148 1044          inccc   %o3
1149 1045          sub     %o0, 2, %o0             ! get next byte
1150 1046          bcc,a,pt %ncc, 1b
1151 1047            lduba [%o0+%o3]ASI_AIUSL, %o4
1152 1048  
1153 1049  2:      membar  #Sync                           ! sync error barrier
1154 1050          stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
  
    | ↓ open down ↓ | 13 lines elided | ↑ open up ↑ | 
1155 1051          retl
1156 1052          mov     %g0, %o0                ! return (0)
1157 1053  
1158 1054  .little_err:
1159 1055          membar  #Sync                           ! sync error barrier
1160 1056          stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1161 1057          retl
1162 1058          mov     %g1, %o0
1163 1059          SET_SIZE(xcopyin_little)
1164 1060  
1165      -#endif  /* lint */
1166 1061  
1167      -
1168 1062  /*
1169 1063   * Copy a block of storage - must not overlap (from + len <= to).
1170 1064   * No fault handler installed (to be called under on_fault())
1171 1065   */
1172      -#if defined(lint)
1173 1066  
1174      -/* ARGSUSED */
1175      -void
1176      -copyin_noerr(const void *ufrom, void *kto, size_t count)
1177      -{}
1178      -
1179      -#else   /* lint */
1180      -
1181 1067          ENTRY(copyin_noerr)
1182 1068          sethi   %hi(.copyio_noerr), REAL_LOFAULT
1183 1069          b       .do_copyin
1184 1070            or    REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1185 1071  .copyio_noerr:
1186 1072          jmp     SAVED_LOFAULT
1187 1073            nop
1188 1074          SET_SIZE(copyin_noerr)
1189 1075  
1190      -#endif /* lint */
1191      -
1192 1076  /*
1193 1077   * Copy a block of storage - must not overlap (from + len <= to).
1194 1078   * No fault handler installed (to be called under on_fault())
1195 1079   */
1196 1080  
1197      -#if defined(lint)
1198      -
1199      -/* ARGSUSED */
1200      -void
1201      -copyout_noerr(const void *kfrom, void *uto, size_t count)
1202      -{}
1203      -
1204      -#else   /* lint */
1205      -
1206 1081          ENTRY(copyout_noerr)
1207 1082          sethi   %hi(.copyio_noerr), REAL_LOFAULT
1208 1083          b       .do_copyout
1209 1084            or    REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
1210 1085          SET_SIZE(copyout_noerr)
1211 1086  
1212      -#endif /* lint */
1213      -
1214      -#if defined(lint)
1215      -
1216      -int use_hw_bcopy = 1;
1217      -int use_hw_bzero = 1;
1218      -
1219      -#else /* !lint */
1220      -
1221 1087          .align  4
1222 1088          DGDEF(use_hw_bcopy)
1223 1089          .word   1
1224 1090          DGDEF(use_hw_bzero)
1225 1091          .word   1
1226 1092  
1227 1093          .align  64
1228 1094          .section ".text"
1229      -#endif /* !lint */
1230 1095  
1231 1096  
1232 1097  /*
1233 1098   * hwblkclr - clears block-aligned, block-multiple-sized regions that are
1234 1099   * longer than 256 bytes in length. For the generic module we will simply
1235 1100   * call bzero and return 1 to ensure that the pages in cache should be
1236 1101   * flushed to ensure integrity.
1237 1102   * Caller is responsible for ensuring use_hw_bzero is true and that
1238 1103   * kpreempt_disable() has been called.
1239 1104   */
1240      -#ifdef lint
1241      -/*ARGSUSED*/
1242      -int
1243      -hwblkclr(void *addr, size_t len)
1244      -{ 
1245      -        return(0);
1246      -}
1247      -#else /* lint */
1248 1105          ! %i0 - start address
1249 1106          ! %i1 - length of region (multiple of 64)
1250 1107  
1251 1108          ENTRY(hwblkclr)
1252 1109          save    %sp, -SA(MINFRAME), %sp
1253 1110  
1254 1111          ! Simply call bzero and notify the caller that bzero was used
1255 1112          mov     %i0, %o0
1256 1113          call    bzero
1257 1114            mov   %i1, %o1
1258 1115          ret
1259 1116          restore %g0, 1, %o0     ! return (1) - did not use block operations
1260 1117  
1261 1118          SET_SIZE(hwblkclr)
1262      -#endif  /* lint */
1263 1119  
1264      -#ifdef  lint
1265      -/* Copy 32 bytes of data from src to dst using physical addresses */
1266      -/*ARGSUSED*/
1267      -void
1268      -hw_pa_bcopy32(uint64_t src, uint64_t dst)
1269      -{}
1270      -#else   /*!lint */
1271      -
1272 1120          /*
1273 1121           * Copy 32 bytes of data from src (%o0) to dst (%o1)
1274 1122           * using physical addresses.
1275 1123           */
1276 1124          ENTRY_NP(hw_pa_bcopy32)
1277 1125          rdpr    %pstate, %g1
1278 1126          andn    %g1, PSTATE_IE, %g2
1279 1127          wrpr    %g0, %g2, %pstate
1280 1128  
1281 1129          ldxa    [%o0]ASI_MEM, %o2
1282 1130          add     %o0, 8, %o0
1283 1131          ldxa    [%o0]ASI_MEM, %o3
1284 1132          add     %o0, 8, %o0
1285 1133          ldxa    [%o0]ASI_MEM, %o4
1286 1134          add     %o0, 8, %o0
1287 1135          ldxa    [%o0]ASI_MEM, %o5
1288 1136          stxa    %o2, [%o1]ASI_MEM
1289 1137          add     %o1, 8, %o1
  
    | ↓ open down ↓ | 8 lines elided | ↑ open up ↑ | 
1290 1138          stxa    %o3, [%o1]ASI_MEM
1291 1139          add     %o1, 8, %o1
1292 1140          stxa    %o4, [%o1]ASI_MEM
1293 1141          add     %o1, 8, %o1
1294 1142          stxa    %o5, [%o1]ASI_MEM
1295 1143  
1296 1144          membar  #Sync
1297 1145          retl
1298 1146            wrpr    %g0, %g1, %pstate
1299 1147          SET_SIZE(hw_pa_bcopy32)
1300      -#endif /* lint */
1301 1148  
1302 1149  /*
1303 1150   * Zero a block of storage.
1304 1151   *
1305 1152   * uzero is used by the kernel to zero a block in user address space.
1306 1153   */
1307 1154  
1308 1155  
1309      -#if defined(lint)
1310      -
1311      -/* ARGSUSED */
1312      -int
1313      -kzero(void *addr, size_t count)
1314      -{ return(0); }
1315      -
1316      -/* ARGSUSED */
1317      -void
1318      -uzero(void *addr, size_t count)
1319      -{}
1320      -
1321      -#else   /* lint */
1322      -
1323 1156          ENTRY(uzero)
1324 1157          !
1325 1158          ! Set a new lo_fault handler only if we came in with one
1326 1159          ! already specified.
1327 1160          !
1328 1161          wr      %g0, ASI_USER, %asi
1329 1162          ldn     [THREAD_REG + T_LOFAULT], %o5
1330 1163          tst     %o5
1331 1164          bz,pt   %ncc, .do_zero
1332 1165          sethi   %hi(.zeroerr), %o2
1333 1166          or      %o2, %lo(.zeroerr), %o2
1334 1167          membar  #Sync
1335 1168          ba,pt   %ncc, .do_zero
1336 1169          stn     %o2, [THREAD_REG + T_LOFAULT]
1337 1170  
1338 1171          ENTRY(kzero)
1339 1172          !
1340 1173          ! Always set a lo_fault handler
1341 1174          !
1342 1175          wr      %g0, ASI_P, %asi
1343 1176          ldn     [THREAD_REG + T_LOFAULT], %o5
1344 1177          sethi   %hi(.zeroerr), %o2
1345 1178          or      %o5, LOFAULT_SET, %o5
1346 1179          or      %o2, %lo(.zeroerr), %o2
1347 1180          membar  #Sync
1348 1181          ba,pt   %ncc, .do_zero
1349 1182          stn     %o2, [THREAD_REG + T_LOFAULT]
1350 1183  
1351 1184  /*
1352 1185   * We got here because of a fault during kzero or if
1353 1186   * uzero or bzero was called with t_lofault non-zero.
1354 1187   * Otherwise we've already run screaming from the room.
1355 1188   * Errno value is in %g1. Note that we're here iff
1356 1189   * we did set t_lofault.
1357 1190   */
1358 1191  .zeroerr:
1359 1192          !
1360 1193          ! Undo asi register setting. Just set it to be the
1361 1194          ! kernel default without checking.
1362 1195          !
1363 1196          wr      %g0, ASI_P, %asi
1364 1197  
1365 1198          !
1366 1199          ! We did set t_lofault. It may well have been zero coming in.
1367 1200          !
1368 1201  1:
1369 1202          tst     %o5
1370 1203          membar #Sync
1371 1204          bne,pn  %ncc, 3f                
1372 1205          andncc  %o5, LOFAULT_SET, %o5
1373 1206  2:
1374 1207          !
1375 1208          ! Old handler was zero. Just return the error.
1376 1209          !
1377 1210          retl                            ! return
1378 1211          mov     %g1, %o0                ! error code from %g1
1379 1212  3:
1380 1213          !
1381 1214          ! We're here because %o5 was non-zero. It was non-zero
1382 1215          ! because either LOFAULT_SET was present, a previous fault
1383 1216          ! handler was present or both. In all cases we need to reset
1384 1217          ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
  
    | ↓ open down ↓ | 52 lines elided | ↑ open up ↑ | 
1385 1218          ! before we either simply return the error or we invoke the
1386 1219          ! previously specified handler.
1387 1220          !
1388 1221          be      %ncc, 2b
1389 1222          stn     %o5, [THREAD_REG + T_LOFAULT]
1390 1223          jmp     %o5                     ! goto real handler
1391 1224            nop
1392 1225          SET_SIZE(kzero)
1393 1226          SET_SIZE(uzero)
1394 1227  
1395      -#endif  /* lint */
1396      -
1397 1228  /*
1398 1229   * Zero a block of storage.
1399 1230   */
1400 1231  
1401      -#if defined(lint)
1402      -
1403      -/* ARGSUSED */
1404      -void
1405      -bzero(void *addr, size_t count)
1406      -{}
1407      -
1408      -#else   /* lint */
1409      -
1410 1232          ENTRY(bzero)
1411 1233          wr      %g0, ASI_P, %asi
1412 1234  
1413 1235          ldn     [THREAD_REG + T_LOFAULT], %o5   ! save old vector
1414 1236          tst     %o5
1415 1237          bz,pt   %ncc, .do_zero
1416 1238          sethi   %hi(.zeroerr), %o2
1417 1239          or      %o2, %lo(.zeroerr), %o2
1418 1240          membar  #Sync                           ! sync error barrier
1419 1241          stn     %o2, [THREAD_REG + T_LOFAULT]   ! install new vector
1420 1242  
1421 1243  .do_zero:
1422 1244          cmp     %o1, 7
1423 1245          blu,pn  %ncc, .byteclr
1424 1246          nop
1425 1247  
1426 1248          cmp     %o1, 15
1427 1249          blu,pn  %ncc, .wdalign
1428 1250          nop
1429 1251  
1430 1252          andcc   %o0, 7, %o3             ! is add aligned on a 8 byte bound
1431 1253          bz,pt   %ncc, .blkalign         ! already double aligned
1432 1254          sub     %o3, 8, %o3             ! -(bytes till double aligned)
1433 1255          add     %o1, %o3, %o1           ! update o1 with new count
1434 1256  
1435 1257  1:
1436 1258          stba    %g0, [%o0]%asi
1437 1259          inccc   %o3
1438 1260          bl,pt   %ncc, 1b
1439 1261          inc     %o0
1440 1262  
1441 1263          ! Now address is double aligned
1442 1264  .blkalign:
1443 1265          cmp     %o1, 0x80               ! check if there are 128 bytes to set
1444 1266          blu,pn  %ncc, .bzero_small
1445 1267          mov     %o1, %o3
1446 1268  
1447 1269          andcc   %o0, 0x3f, %o3          ! is block aligned?
1448 1270          bz,pt   %ncc, .bzero_blk
1449 1271          sub     %o3, 0x40, %o3          ! -(bytes till block aligned)
1450 1272          add     %o1, %o3, %o1           ! o1 is the remainder
1451 1273          
1452 1274          ! Clear -(%o3) bytes till block aligned
1453 1275  1:
1454 1276          stxa    %g0, [%o0]%asi
1455 1277          addcc   %o3, 8, %o3
1456 1278          bl,pt   %ncc, 1b
1457 1279          add     %o0, 8, %o0
1458 1280  
1459 1281  .bzero_blk:
1460 1282          and     %o1, 0x3f, %o3          ! calc bytes left after blk clear
1461 1283          andn    %o1, 0x3f, %o4          ! calc size of blocks in bytes
1462 1284  
1463 1285          cmp     %o4, 0x100              ! 256 bytes or more
1464 1286          blu,pn  %ncc, 3f
1465 1287          nop
1466 1288  
1467 1289  2:
1468 1290          stxa    %g0, [%o0+0x0]%asi
1469 1291          stxa    %g0, [%o0+0x40]%asi
1470 1292          stxa    %g0, [%o0+0x80]%asi
1471 1293          stxa    %g0, [%o0+0xc0]%asi
1472 1294  
1473 1295          stxa    %g0, [%o0+0x8]%asi
1474 1296          stxa    %g0, [%o0+0x10]%asi
1475 1297          stxa    %g0, [%o0+0x18]%asi
1476 1298          stxa    %g0, [%o0+0x20]%asi
1477 1299          stxa    %g0, [%o0+0x28]%asi
1478 1300          stxa    %g0, [%o0+0x30]%asi
1479 1301          stxa    %g0, [%o0+0x38]%asi
1480 1302  
1481 1303          stxa    %g0, [%o0+0x48]%asi
1482 1304          stxa    %g0, [%o0+0x50]%asi
1483 1305          stxa    %g0, [%o0+0x58]%asi
1484 1306          stxa    %g0, [%o0+0x60]%asi
1485 1307          stxa    %g0, [%o0+0x68]%asi
1486 1308          stxa    %g0, [%o0+0x70]%asi
1487 1309          stxa    %g0, [%o0+0x78]%asi
1488 1310  
1489 1311          stxa    %g0, [%o0+0x88]%asi
1490 1312          stxa    %g0, [%o0+0x90]%asi
1491 1313          stxa    %g0, [%o0+0x98]%asi
1492 1314          stxa    %g0, [%o0+0xa0]%asi
1493 1315          stxa    %g0, [%o0+0xa8]%asi
1494 1316          stxa    %g0, [%o0+0xb0]%asi
1495 1317          stxa    %g0, [%o0+0xb8]%asi
1496 1318  
1497 1319          stxa    %g0, [%o0+0xc8]%asi
1498 1320          stxa    %g0, [%o0+0xd0]%asi
1499 1321          stxa    %g0, [%o0+0xd8]%asi
1500 1322          stxa    %g0, [%o0+0xe0]%asi
1501 1323          stxa    %g0, [%o0+0xe8]%asi
1502 1324          stxa    %g0, [%o0+0xf0]%asi
1503 1325          stxa    %g0, [%o0+0xf8]%asi
1504 1326  
1505 1327          sub     %o4, 0x100, %o4
1506 1328          cmp     %o4, 0x100
1507 1329          bgu,pt  %ncc, 2b
1508 1330          add     %o0, 0x100, %o0
1509 1331  
1510 1332  3:
1511 1333          ! ... check if 64 bytes to set
1512 1334          cmp     %o4, 0x40
1513 1335          blu     %ncc, .bzero_blk_done
1514 1336          nop
1515 1337  
1516 1338  4:
1517 1339          stxa    %g0, [%o0+0x0]%asi
1518 1340          stxa    %g0, [%o0+0x8]%asi
1519 1341          stxa    %g0, [%o0+0x10]%asi
1520 1342          stxa    %g0, [%o0+0x18]%asi
1521 1343          stxa    %g0, [%o0+0x20]%asi
1522 1344          stxa    %g0, [%o0+0x28]%asi
1523 1345          stxa    %g0, [%o0+0x30]%asi
1524 1346          stxa    %g0, [%o0+0x38]%asi
1525 1347  
1526 1348          subcc   %o4, 0x40, %o4
1527 1349          bgu,pt  %ncc, 3b
1528 1350          add     %o0, 0x40, %o0
1529 1351  
1530 1352  .bzero_blk_done:
1531 1353          membar  #Sync
1532 1354  
1533 1355  .bzero_small:
1534 1356          ! Set the remaining doubles
1535 1357          subcc   %o3, 8, %o3             ! Can we store any doubles?
1536 1358          blu,pn  %ncc, .byteclr
1537 1359          and     %o1, 7, %o1             ! calc bytes left after doubles
1538 1360  
1539 1361  .dbclr:
1540 1362          stxa    %g0, [%o0]%asi          ! Clear the doubles
1541 1363          subcc   %o3, 8, %o3
1542 1364          bgeu,pt %ncc, .dbclr
1543 1365          add     %o0, 8, %o0
1544 1366  
1545 1367          ba      .byteclr
1546 1368          nop
1547 1369  
1548 1370  .wdalign:                       
1549 1371          andcc   %o0, 3, %o3             ! is add aligned on a word boundary
1550 1372          bz,pn   %ncc, .wdclr
1551 1373          andn    %o1, 3, %o3             ! create word sized count in %o3
1552 1374  
1553 1375          dec     %o1                     ! decrement count
1554 1376          stba    %g0, [%o0]%asi          ! clear a byte
1555 1377          ba      .wdalign
1556 1378          inc     %o0                     ! next byte
1557 1379  
1558 1380  .wdclr:
1559 1381          sta     %g0, [%o0]%asi          ! 4-byte clearing loop
1560 1382          subcc   %o3, 4, %o3
1561 1383          bnz,pt  %ncc, .wdclr
1562 1384          inc     4, %o0
1563 1385  
1564 1386          and     %o1, 3, %o1             ! leftover count, if any
1565 1387  
1566 1388  .byteclr:
1567 1389          ! Set the leftover bytes
1568 1390          brz     %o1, .bzero_exit
1569 1391          nop
1570 1392  
1571 1393  7:
1572 1394          deccc   %o1                     ! byte clearing loop
1573 1395          stba    %g0, [%o0]%asi
1574 1396          bgu,pt  %ncc, 7b
1575 1397          inc     %o0
1576 1398  
1577 1399  .bzero_exit:
1578 1400          !
1579 1401          ! We're just concerned with whether t_lofault was set
1580 1402          ! when we came in. We end up here from either kzero()
1581 1403          ! or bzero(). kzero() *always* sets a lofault handler.
1582 1404          ! It ors LOFAULT_SET into %o5 to indicate it has done
1583 1405          ! this even if the value of %o5 is otherwise zero.
1584 1406          ! bzero() sets a lofault handler *only* if one was
1585 1407          ! previously set. Accordingly we need to examine
1586 1408          ! %o5 and if it is non-zero be sure to clear LOFAULT_SET
1587 1409          ! before resetting the error handler.
1588 1410          !
  
    | ↓ open down ↓ | 169 lines elided | ↑ open up ↑ | 
1589 1411          tst     %o5
1590 1412          bz      %ncc, 1f
1591 1413          andn    %o5, LOFAULT_SET, %o5
1592 1414          membar  #Sync                           ! sync error barrier
1593 1415          stn     %o5, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
1594 1416  1:
1595 1417          retl
1596 1418          clr     %o0                     ! return (0)
1597 1419  
1598 1420          SET_SIZE(bzero)
1599      -#endif  /* lint */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX