Print this page
    
OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/intel/ia32/ml/copy.s
          +++ new/usr/src/uts/intel/ia32/ml/copy.s
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright (c) 2009, Intel Corporation
  28   28   * All rights reserved.
  
    | ↓ open down ↓ | 28 lines elided | ↑ open up ↑ | 
  29   29   */
  30   30  
  31   31  /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32   32  /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T          */
  33   33  /*         All Rights Reserved                                          */
  34   34  
  35   35  /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36   36  /*         All Rights Reserved                                          */
  37   37  
  38   38  /*
  39      - * Copyright 2016 Joyent, Inc.
       39 + * Copyright (c) 2018 Joyent, Inc.
  40   40   */
  41   41  
  42   42  #include <sys/errno.h>
  43   43  #include <sys/asm_linkage.h>
  44   44  
  45   45  #if defined(__lint)
  46   46  #include <sys/types.h>
  47   47  #include <sys/systm.h>
  48   48  #else   /* __lint */
  49   49  #include "assym.h"
  50   50  #endif  /* __lint */
  51   51  
  52   52  #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53   53  #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54   54  /*
  55   55   * Non-temopral access (NTA) alignment requirement
  56   56   */
  57   57  #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58   58  #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59   59  #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  60   60  #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  61   61  
  62   62  /*
  63   63   * With the introduction of Broadwell, Intel has introduced supervisor mode
  64   64   * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  65   65   * enable access of user pages (AC in rflags, defines as PS_ACHK in
  66   66   * <sys/psw.h>). One of the challenges is that the implementation of many of the
  67   67   * userland copy routines directly use the kernel ones. For example, copyin and
  68   68   * copyout simply go and jump to the do_copy_fault label and traditionally let
  69   69   * those deal with the return for them. In fact, changing that is a can of frame
  70   70   * pointers.
  71   71   *
  72   72   * Rules and Constraints:
  73   73   *
  74   74   * 1. For anything that's not in copy.s, we have it do explicit calls to the
  75   75   * smap related code. It usually is in a position where it is able to. This is
  76   76   * restricted to the following three places: DTrace, resume() in swtch.s and
  77   77   * on_fault/no_fault. If you want to add it somewhere else, we should be
  78   78   * thinking twice.
  79   79   *
  80   80   * 2. We try to toggle this at the smallest window possible. This means that if
  81   81   * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  82   82   * other function, we will always leave with SMAP enabled (the kernel cannot
  83   83   * access user pages).
  84   84   *
  85   85   * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  86   86   * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  87   87   * which already takes care of ensuring that SMAP is enabled and disabled. Note
  88   88   * this means that when under an on_fault()/no_fault() handler, one must not
  89   89   * call the non-*_noeer() routines.
  90   90   *
  91   91   * 4. The first thing we should do after coming out of an lofault handler is to
  92   92   * make sure that we call smap_enable again to ensure that we are safely
  93   93   * protected, as more often than not, we will have disabled smap to get there.
  94   94   *
  95   95   * 5. The SMAP functions, smap_enable and smap_disable may not touch any
  96   96   * registers beyond those done by the call and ret. These routines may be called
  97   97   * from arbitrary contexts in copy.s where we have slightly more special ABIs in
  98   98   * place.
  99   99   *
 100  100   * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
 101  101   * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
 102  102   * smap_disable()). If the number of these is changed, you must update the
 103  103   * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
 104  104   *
 105  105   * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
 106  106   * no known technical reason preventing it from being enabled.
 107  107   *
 108  108   * 8. Generally this .s file is processed by a K&R style cpp. This means that it
 109  109   * really has a lot of feelings about whitespace. In particular, if you have a
 110  110   * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 111  111   *
 112  112   * 9. The smap_enable and smap_disable functions should not generally be called.
 113  113   * They exist such that DTrace and on_trap() may use them, that's it.
 114  114   *
 115  115   * 10. In general, the kernel has its own value for rflags that gets used. This
 116  116   * is maintained in a few different places which vary based on how the thread
 117  117   * comes into existence and whether it's a user thread. In general, when the
 118  118   * kernel takes a trap, it always will set ourselves to a known set of flags,
 119  119   * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 120  120   * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
 121  121   * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 122  122   * where that gets masked off.
 123  123   */
 124  124  
 125  125  /*
 126  126   * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 127  127   * "rep smovq" for large sizes. Performance data shows that many calls to
 128  128   * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 129  129   * these small sizes unrolled code is used. For medium sizes loops writing
 130  130   * 64-bytes per loop are used. Transition points were determined experimentally.
 131  131   */ 
 132  132  #define BZERO_USE_REP   (1024)
 133  133  #define BCOPY_DFLT_REP  (128)
 134  134  #define BCOPY_NHM_REP   (768)
 135  135  
 136  136  /*
 137  137   * Copy a block of storage, returning an error code if `from' or
 138  138   * `to' takes a kernel pagefault which cannot be resolved.
 139  139   * Returns errno value on pagefault error, 0 if all ok
 140  140   */
 141  141  
 142  142  /*
 143  143   * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
 144  144   * additional call instructions.
 145  145   */
 146  146  #if defined(__amd64)
 147  147  #define SMAP_DISABLE_COUNT      16
 148  148  #define SMAP_ENABLE_COUNT       26
 149  149  #elif defined(__i386)
 150  150  #define SMAP_DISABLE_COUNT      0
 151  151  #define SMAP_ENABLE_COUNT       0
 152  152  #endif
 153  153  
 154  154  #define SMAP_DISABLE_INSTR(ITER)                \
 155  155          .globl  _smap_disable_patch_/**/ITER;   \
 156  156          _smap_disable_patch_/**/ITER/**/:;      \
 157  157          nop; nop; nop;
 158  158  
 159  159  #define SMAP_ENABLE_INSTR(ITER)                 \
 160  160          .globl  _smap_enable_patch_/**/ITER;    \
 161  161          _smap_enable_patch_/**/ITER/**/:;       \
 162  162          nop; nop; nop;
 163  163  
 164  164  #if defined(__lint)
 165  165  
 166  166  /* ARGSUSED */
 167  167  int
 168  168  kcopy(const void *from, void *to, size_t count)
 169  169  { return (0); }
 170  170  
 171  171  #else   /* __lint */
 172  172  
 173  173          .globl  kernelbase
 174  174          .globl  postbootkernelbase
 175  175  
 176  176  #if defined(__amd64)
 177  177  
 178  178          ENTRY(kcopy)
 179  179          pushq   %rbp
 180  180          movq    %rsp, %rbp
 181  181  #ifdef DEBUG
 182  182          cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 183  183          jb      0f
 184  184          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 185  185          jnb     1f
 186  186  0:      leaq    .kcopy_panic_msg(%rip), %rdi
 187  187          xorl    %eax, %eax
 188  188          call    panic
 189  189  1:
 190  190  #endif
 191  191          /*
 192  192           * pass lofault value as 4th argument to do_copy_fault
 193  193           */
 194  194          leaq    _kcopy_copyerr(%rip), %rcx
 195  195          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 196  196  
 197  197  do_copy_fault:
 198  198          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 199  199          movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 200  200          call    bcopy_altentry
 201  201          xorl    %eax, %eax              /* return 0 (success) */
 202  202          SMAP_ENABLE_INSTR(0)
 203  203  
 204  204          /*
 205  205           * A fault during do_copy_fault is indicated through an errno value
 206  206           * in %rax and we iretq from the trap handler to here.
 207  207           */
 208  208  _kcopy_copyerr:
 209  209          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 210  210          leave
 211  211          ret
 212  212          SET_SIZE(kcopy)
 213  213  
 214  214  #elif defined(__i386)
 215  215  
 216  216  #define ARG_FROM        8
 217  217  #define ARG_TO          12
 218  218  #define ARG_COUNT       16
 219  219  
 220  220          ENTRY(kcopy)
 221  221  #ifdef DEBUG
 222  222          pushl   %ebp
 223  223          movl    %esp, %ebp
 224  224          movl    postbootkernelbase, %eax
 225  225          cmpl    %eax, ARG_FROM(%ebp)
 226  226          jb      0f
 227  227          cmpl    %eax, ARG_TO(%ebp)
 228  228          jnb     1f
 229  229  0:      pushl   $.kcopy_panic_msg
 230  230          call    panic
 231  231  1:      popl    %ebp
 232  232  #endif
 233  233          lea     _kcopy_copyerr, %eax    /* lofault value */
 234  234          movl    %gs:CPU_THREAD, %edx    
 235  235  
 236  236  do_copy_fault:
 237  237          pushl   %ebp
 238  238          movl    %esp, %ebp              /* setup stack frame */
 239  239          pushl   %esi
 240  240          pushl   %edi                    /* save registers */
 241  241  
 242  242          movl    T_LOFAULT(%edx), %edi
 243  243          pushl   %edi                    /* save the current lofault */
 244  244          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 245  245  
 246  246          movl    ARG_COUNT(%ebp), %ecx
 247  247          movl    ARG_FROM(%ebp), %esi
 248  248          movl    ARG_TO(%ebp), %edi
 249  249          shrl    $2, %ecx                /* word count */
 250  250          rep
 251  251            smovl
 252  252          movl    ARG_COUNT(%ebp), %ecx
 253  253          andl    $3, %ecx                /* bytes left over */
 254  254          rep
 255  255            smovb
 256  256          xorl    %eax, %eax
 257  257  
 258  258          /*
 259  259           * A fault during do_copy_fault is indicated through an errno value
 260  260           * in %eax and we iret from the trap handler to here.
 261  261           */
 262  262  _kcopy_copyerr:
 263  263          popl    %ecx
 264  264          popl    %edi
 265  265          movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 266  266          popl    %esi
 267  267          popl    %ebp
 268  268          ret
 269  269          SET_SIZE(kcopy)
 270  270  
 271  271  #undef  ARG_FROM
 272  272  #undef  ARG_TO
 273  273  #undef  ARG_COUNT
 274  274  
 275  275  #endif  /* __i386 */
 276  276  #endif  /* __lint */
 277  277  
 278  278  #if defined(__lint)
 279  279  
 280  280  /*
 281  281   * Copy a block of storage.  Similar to kcopy but uses non-temporal
 282  282   * instructions.
 283  283   */
 284  284  
 285  285  /* ARGSUSED */
 286  286  int
 287  287  kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
 288  288  { return (0); }
 289  289  
 290  290  #else   /* __lint */
 291  291  
 292  292  #if defined(__amd64)
 293  293  
 294  294  #define COPY_LOOP_INIT(src, dst, cnt)   \
 295  295          addq    cnt, src;                       \
 296  296          addq    cnt, dst;                       \
 297  297          shrq    $3, cnt;                        \
 298  298          neg     cnt
 299  299  
 300  300          /* Copy 16 bytes per loop.  Uses %rax and %r8 */
 301  301  #define COPY_LOOP_BODY(src, dst, cnt)   \
 302  302          prefetchnta     0x100(src, cnt, 8);     \
 303  303          movq    (src, cnt, 8), %rax;            \
 304  304          movq    0x8(src, cnt, 8), %r8;          \
 305  305          movnti  %rax, (dst, cnt, 8);            \
 306  306          movnti  %r8, 0x8(dst, cnt, 8);          \
 307  307          addq    $2, cnt
 308  308  
 309  309          ENTRY(kcopy_nta)
 310  310          pushq   %rbp
 311  311          movq    %rsp, %rbp
 312  312  #ifdef DEBUG
 313  313          cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 314  314          jb      0f
 315  315          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 316  316          jnb     1f
 317  317  0:      leaq    .kcopy_panic_msg(%rip), %rdi
 318  318          xorl    %eax, %eax
 319  319          call    panic
 320  320  1:
 321  321  #endif
 322  322  
 323  323          movq    %gs:CPU_THREAD, %r9
 324  324          cmpq    $0, %rcx                /* No non-temporal access? */
 325  325          /*
 326  326           * pass lofault value as 4th argument to do_copy_fault
 327  327           */
 328  328          leaq    _kcopy_nta_copyerr(%rip), %rcx  /* doesn't set rflags */
 329  329          jnz     do_copy_fault           /* use regular access */
 330  330          /*
 331  331           * Make sure cnt is >= KCOPY_MIN_SIZE
 332  332           */
 333  333          cmpq    $KCOPY_MIN_SIZE, %rdx
 334  334          jb      do_copy_fault
 335  335  
 336  336          /*
 337  337           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
 338  338           * count is COUNT_ALIGN_SIZE aligned.
 339  339           */
 340  340          movq    %rdi, %r10
 341  341          orq     %rsi, %r10
 342  342          andq    $NTA_ALIGN_MASK, %r10
 343  343          orq     %rdx, %r10
 344  344          andq    $COUNT_ALIGN_MASK, %r10
 345  345          jnz     do_copy_fault
 346  346  
 347  347          ALTENTRY(do_copy_fault_nta)
 348  348          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 349  349          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 350  350          movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 351  351  
 352  352          /*
 353  353           * COPY_LOOP_BODY uses %rax and %r8
 354  354           */
 355  355          COPY_LOOP_INIT(%rdi, %rsi, %rdx)
 356  356  2:      COPY_LOOP_BODY(%rdi, %rsi, %rdx)
 357  357          jnz     2b
 358  358  
 359  359          mfence
 360  360          xorl    %eax, %eax              /* return 0 (success) */
 361  361          SMAP_ENABLE_INSTR(1)
 362  362  
 363  363  _kcopy_nta_copyerr:
 364  364          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 365  365          leave
 366  366          ret
 367  367          SET_SIZE(do_copy_fault_nta)
 368  368          SET_SIZE(kcopy_nta)
 369  369  
 370  370  #elif defined(__i386)
 371  371  
 372  372  #define ARG_FROM        8
 373  373  #define ARG_TO          12
 374  374  #define ARG_COUNT       16
 375  375  
 376  376  #define COPY_LOOP_INIT(src, dst, cnt)   \
 377  377          addl    cnt, src;                       \
 378  378          addl    cnt, dst;                       \
 379  379          shrl    $3, cnt;                        \
 380  380          neg     cnt
 381  381  
 382  382  #define COPY_LOOP_BODY(src, dst, cnt)   \
 383  383          prefetchnta     0x100(src, cnt, 8);     \
 384  384          movl    (src, cnt, 8), %esi;            \
 385  385          movnti  %esi, (dst, cnt, 8);            \
 386  386          movl    0x4(src, cnt, 8), %esi;         \
 387  387          movnti  %esi, 0x4(dst, cnt, 8);         \
 388  388          movl    0x8(src, cnt, 8), %esi;         \
 389  389          movnti  %esi, 0x8(dst, cnt, 8);         \
 390  390          movl    0xc(src, cnt, 8), %esi;         \
 391  391          movnti  %esi, 0xc(dst, cnt, 8);         \
 392  392          addl    $2, cnt
 393  393  
 394  394          /*
 395  395           * kcopy_nta is not implemented for 32-bit as no performance
 396  396           * improvement was shown.  We simply jump directly to kcopy
 397  397           * and discard the 4 arguments.
 398  398           */
 399  399          ENTRY(kcopy_nta)
 400  400          jmp     kcopy
 401  401  
 402  402          lea     _kcopy_nta_copyerr, %eax        /* lofault value */
 403  403          ALTENTRY(do_copy_fault_nta)
 404  404          pushl   %ebp
 405  405          movl    %esp, %ebp              /* setup stack frame */
 406  406          pushl   %esi
 407  407          pushl   %edi
 408  408  
 409  409          movl    %gs:CPU_THREAD, %edx    
 410  410          movl    T_LOFAULT(%edx), %edi
 411  411          pushl   %edi                    /* save the current lofault */
 412  412          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 413  413  
 414  414          /* COPY_LOOP_BODY needs to use %esi */
 415  415          movl    ARG_COUNT(%ebp), %ecx
 416  416          movl    ARG_FROM(%ebp), %edi
 417  417          movl    ARG_TO(%ebp), %eax
 418  418          COPY_LOOP_INIT(%edi, %eax, %ecx)
 419  419  1:      COPY_LOOP_BODY(%edi, %eax, %ecx)
 420  420          jnz     1b
 421  421          mfence
 422  422  
 423  423          xorl    %eax, %eax
 424  424  _kcopy_nta_copyerr:
 425  425          popl    %ecx
 426  426          popl    %edi
 427  427          movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 428  428          popl    %esi
 429  429          leave
 430  430          ret
 431  431          SET_SIZE(do_copy_fault_nta)
 432  432          SET_SIZE(kcopy_nta)
 433  433  
 434  434  #undef  ARG_FROM
 435  435  #undef  ARG_TO
 436  436  #undef  ARG_COUNT
 437  437  
 438  438  #endif  /* __i386 */
 439  439  #endif  /* __lint */
 440  440  
 441  441  #if defined(__lint)
 442  442  
 443  443  /* ARGSUSED */
 444  444  void
 445  445  bcopy(const void *from, void *to, size_t count)
 446  446  {}
 447  447  
 448  448  #else   /* __lint */
 449  449  
 450  450  #if defined(__amd64)
 451  451  
 452  452          ENTRY(bcopy)
 453  453  #ifdef DEBUG
 454  454          orq     %rdx, %rdx              /* %rdx = count */
 455  455          jz      1f
 456  456          cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 457  457          jb      0f
 458  458          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */         
 459  459          jnb     1f
 460  460  0:      leaq    .bcopy_panic_msg(%rip), %rdi
 461  461          jmp     call_panic              /* setup stack and call panic */
 462  462  1:
 463  463  #endif
 464  464          /*
 465  465           * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466  466           * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467  467           * uses these registers in future they must be saved and restored.
 468  468           */
 469  469          ALTENTRY(bcopy_altentry)
 470  470  do_copy:
 471  471  #define L(s) .bcopy/**/s
 472  472          cmpq    $0x50, %rdx             /* 80 */
 473  473          jae     bcopy_ck_size
 474  474  
 475  475          /*
 476  476           * Performance data shows many caller's copy small buffers. So for
 477  477           * best perf for these sizes unrolled code is used. Store data without
 478  478           * worrying about alignment.
 479  479           */
 480  480          leaq    L(fwdPxQx)(%rip), %r10
 481  481          addq    %rdx, %rdi
 482  482          addq    %rdx, %rsi
 483  483          movslq  (%r10,%rdx,4), %rcx
 484  484          leaq    (%rcx,%r10,1), %r10
 485  485          jmpq    *%r10
 486  486  
 487  487          .p2align 4
 488  488  L(fwdPxQx):
 489  489          .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490  490          .int       L(P1Q0)-L(fwdPxQx)
 491  491          .int       L(P2Q0)-L(fwdPxQx)
 492  492          .int       L(P3Q0)-L(fwdPxQx)
 493  493          .int       L(P4Q0)-L(fwdPxQx)
 494  494          .int       L(P5Q0)-L(fwdPxQx)
 495  495          .int       L(P6Q0)-L(fwdPxQx)
 496  496          .int       L(P7Q0)-L(fwdPxQx) 
 497  497  
 498  498          .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499  499          .int       L(P1Q1)-L(fwdPxQx)
 500  500          .int       L(P2Q1)-L(fwdPxQx)
 501  501          .int       L(P3Q1)-L(fwdPxQx)
 502  502          .int       L(P4Q1)-L(fwdPxQx)
 503  503          .int       L(P5Q1)-L(fwdPxQx)
 504  504          .int       L(P6Q1)-L(fwdPxQx)
 505  505          .int       L(P7Q1)-L(fwdPxQx) 
 506  506  
 507  507          .int       L(P0Q2)-L(fwdPxQx)   /* 16 */
 508  508          .int       L(P1Q2)-L(fwdPxQx)
 509  509          .int       L(P2Q2)-L(fwdPxQx)
 510  510          .int       L(P3Q2)-L(fwdPxQx)
 511  511          .int       L(P4Q2)-L(fwdPxQx)
 512  512          .int       L(P5Q2)-L(fwdPxQx)
 513  513          .int       L(P6Q2)-L(fwdPxQx)
 514  514          .int       L(P7Q2)-L(fwdPxQx) 
 515  515  
 516  516          .int       L(P0Q3)-L(fwdPxQx)   /* 24 */
 517  517          .int       L(P1Q3)-L(fwdPxQx)
 518  518          .int       L(P2Q3)-L(fwdPxQx)
 519  519          .int       L(P3Q3)-L(fwdPxQx)
 520  520          .int       L(P4Q3)-L(fwdPxQx)
 521  521          .int       L(P5Q3)-L(fwdPxQx)
 522  522          .int       L(P6Q3)-L(fwdPxQx)
 523  523          .int       L(P7Q3)-L(fwdPxQx) 
 524  524  
 525  525          .int       L(P0Q4)-L(fwdPxQx)   /* 32 */
 526  526          .int       L(P1Q4)-L(fwdPxQx)
 527  527          .int       L(P2Q4)-L(fwdPxQx)
 528  528          .int       L(P3Q4)-L(fwdPxQx)
 529  529          .int       L(P4Q4)-L(fwdPxQx)
 530  530          .int       L(P5Q4)-L(fwdPxQx)
 531  531          .int       L(P6Q4)-L(fwdPxQx)
 532  532          .int       L(P7Q4)-L(fwdPxQx) 
 533  533  
 534  534          .int       L(P0Q5)-L(fwdPxQx)   /* 40 */
 535  535          .int       L(P1Q5)-L(fwdPxQx)
 536  536          .int       L(P2Q5)-L(fwdPxQx)
 537  537          .int       L(P3Q5)-L(fwdPxQx)
 538  538          .int       L(P4Q5)-L(fwdPxQx)
 539  539          .int       L(P5Q5)-L(fwdPxQx)
 540  540          .int       L(P6Q5)-L(fwdPxQx)
 541  541          .int       L(P7Q5)-L(fwdPxQx) 
 542  542  
 543  543          .int       L(P0Q6)-L(fwdPxQx)   /* 48 */
 544  544          .int       L(P1Q6)-L(fwdPxQx)
 545  545          .int       L(P2Q6)-L(fwdPxQx)
 546  546          .int       L(P3Q6)-L(fwdPxQx)
 547  547          .int       L(P4Q6)-L(fwdPxQx)
 548  548          .int       L(P5Q6)-L(fwdPxQx)
 549  549          .int       L(P6Q6)-L(fwdPxQx)
 550  550          .int       L(P7Q6)-L(fwdPxQx) 
 551  551  
 552  552          .int       L(P0Q7)-L(fwdPxQx)   /* 56 */
 553  553          .int       L(P1Q7)-L(fwdPxQx)
 554  554          .int       L(P2Q7)-L(fwdPxQx)
 555  555          .int       L(P3Q7)-L(fwdPxQx)
 556  556          .int       L(P4Q7)-L(fwdPxQx)
 557  557          .int       L(P5Q7)-L(fwdPxQx)
 558  558          .int       L(P6Q7)-L(fwdPxQx)
 559  559          .int       L(P7Q7)-L(fwdPxQx) 
 560  560  
 561  561          .int       L(P0Q8)-L(fwdPxQx)   /* 64 */
 562  562          .int       L(P1Q8)-L(fwdPxQx)
 563  563          .int       L(P2Q8)-L(fwdPxQx)
 564  564          .int       L(P3Q8)-L(fwdPxQx)
 565  565          .int       L(P4Q8)-L(fwdPxQx)
 566  566          .int       L(P5Q8)-L(fwdPxQx)
 567  567          .int       L(P6Q8)-L(fwdPxQx)
 568  568          .int       L(P7Q8)-L(fwdPxQx)
 569  569  
 570  570          .int       L(P0Q9)-L(fwdPxQx)   /* 72 */
 571  571          .int       L(P1Q9)-L(fwdPxQx)
 572  572          .int       L(P2Q9)-L(fwdPxQx)
 573  573          .int       L(P3Q9)-L(fwdPxQx)
 574  574          .int       L(P4Q9)-L(fwdPxQx)
 575  575          .int       L(P5Q9)-L(fwdPxQx)
 576  576          .int       L(P6Q9)-L(fwdPxQx)
 577  577          .int       L(P7Q9)-L(fwdPxQx)   /* 79 */
 578  578  
 579  579          .p2align 4
 580  580  L(P0Q9):
 581  581          mov    -0x48(%rdi), %rcx
 582  582          mov    %rcx, -0x48(%rsi)
 583  583  L(P0Q8):
 584  584          mov    -0x40(%rdi), %r10
 585  585          mov    %r10, -0x40(%rsi)
 586  586  L(P0Q7):
 587  587          mov    -0x38(%rdi), %r8
 588  588          mov    %r8, -0x38(%rsi)
 589  589  L(P0Q6):
 590  590          mov    -0x30(%rdi), %rcx
 591  591          mov    %rcx, -0x30(%rsi)
 592  592  L(P0Q5):
 593  593          mov    -0x28(%rdi), %r10
 594  594          mov    %r10, -0x28(%rsi)
 595  595  L(P0Q4):
 596  596          mov    -0x20(%rdi), %r8
 597  597          mov    %r8, -0x20(%rsi)
 598  598  L(P0Q3):
 599  599          mov    -0x18(%rdi), %rcx
 600  600          mov    %rcx, -0x18(%rsi)
 601  601  L(P0Q2):
 602  602          mov    -0x10(%rdi), %r10
 603  603          mov    %r10, -0x10(%rsi)
 604  604  L(P0Q1):
 605  605          mov    -0x8(%rdi), %r8
 606  606          mov    %r8, -0x8(%rsi)
 607  607  L(P0Q0):                                   
 608  608          ret   
 609  609  
 610  610          .p2align 4
 611  611  L(P1Q9):
 612  612          mov    -0x49(%rdi), %r8
 613  613          mov    %r8, -0x49(%rsi)
 614  614  L(P1Q8):
 615  615          mov    -0x41(%rdi), %rcx
 616  616          mov    %rcx, -0x41(%rsi)
 617  617  L(P1Q7):
 618  618          mov    -0x39(%rdi), %r10
 619  619          mov    %r10, -0x39(%rsi)
 620  620  L(P1Q6):
 621  621          mov    -0x31(%rdi), %r8
 622  622          mov    %r8, -0x31(%rsi)
 623  623  L(P1Q5):
 624  624          mov    -0x29(%rdi), %rcx
 625  625          mov    %rcx, -0x29(%rsi)
 626  626  L(P1Q4):
 627  627          mov    -0x21(%rdi), %r10
 628  628          mov    %r10, -0x21(%rsi)
 629  629  L(P1Q3):
 630  630          mov    -0x19(%rdi), %r8
 631  631          mov    %r8, -0x19(%rsi)
 632  632  L(P1Q2):
 633  633          mov    -0x11(%rdi), %rcx
 634  634          mov    %rcx, -0x11(%rsi)
 635  635  L(P1Q1):
 636  636          mov    -0x9(%rdi), %r10
 637  637          mov    %r10, -0x9(%rsi)
 638  638  L(P1Q0):
 639  639          movzbq -0x1(%rdi), %r8
 640  640          mov    %r8b, -0x1(%rsi)
 641  641          ret   
 642  642  
 643  643          .p2align 4
 644  644  L(P2Q9):
 645  645          mov    -0x4a(%rdi), %r8
 646  646          mov    %r8, -0x4a(%rsi)
 647  647  L(P2Q8):
 648  648          mov    -0x42(%rdi), %rcx
 649  649          mov    %rcx, -0x42(%rsi)
 650  650  L(P2Q7):
 651  651          mov    -0x3a(%rdi), %r10
 652  652          mov    %r10, -0x3a(%rsi)
 653  653  L(P2Q6):
 654  654          mov    -0x32(%rdi), %r8
 655  655          mov    %r8, -0x32(%rsi)
 656  656  L(P2Q5):
 657  657          mov    -0x2a(%rdi), %rcx
 658  658          mov    %rcx, -0x2a(%rsi)
 659  659  L(P2Q4):
 660  660          mov    -0x22(%rdi), %r10
 661  661          mov    %r10, -0x22(%rsi)
 662  662  L(P2Q3):
 663  663          mov    -0x1a(%rdi), %r8
 664  664          mov    %r8, -0x1a(%rsi)
 665  665  L(P2Q2):
 666  666          mov    -0x12(%rdi), %rcx
 667  667          mov    %rcx, -0x12(%rsi)
 668  668  L(P2Q1):
 669  669          mov    -0xa(%rdi), %r10
 670  670          mov    %r10, -0xa(%rsi)
 671  671  L(P2Q0):
 672  672          movzwq -0x2(%rdi), %r8
 673  673          mov    %r8w, -0x2(%rsi)
 674  674          ret   
 675  675  
 676  676          .p2align 4
 677  677  L(P3Q9):
 678  678          mov    -0x4b(%rdi), %r8
 679  679          mov    %r8, -0x4b(%rsi)
 680  680  L(P3Q8):
 681  681          mov    -0x43(%rdi), %rcx
 682  682          mov    %rcx, -0x43(%rsi)
 683  683  L(P3Q7):
 684  684          mov    -0x3b(%rdi), %r10
 685  685          mov    %r10, -0x3b(%rsi)
 686  686  L(P3Q6):
 687  687          mov    -0x33(%rdi), %r8
 688  688          mov    %r8, -0x33(%rsi)
 689  689  L(P3Q5):
 690  690          mov    -0x2b(%rdi), %rcx
 691  691          mov    %rcx, -0x2b(%rsi)
 692  692  L(P3Q4):
 693  693          mov    -0x23(%rdi), %r10
 694  694          mov    %r10, -0x23(%rsi)
 695  695  L(P3Q3):
 696  696          mov    -0x1b(%rdi), %r8
 697  697          mov    %r8, -0x1b(%rsi)
 698  698  L(P3Q2):
 699  699          mov    -0x13(%rdi), %rcx
 700  700          mov    %rcx, -0x13(%rsi)
 701  701  L(P3Q1):
 702  702          mov    -0xb(%rdi), %r10
 703  703          mov    %r10, -0xb(%rsi)
 704  704          /*
 705  705           * These trailing loads/stores have to do all their loads 1st, 
 706  706           * then do the stores.
 707  707           */
 708  708  L(P3Q0):
 709  709          movzwq -0x3(%rdi), %r8
 710  710          movzbq -0x1(%rdi), %r10
 711  711          mov    %r8w, -0x3(%rsi)
 712  712          mov    %r10b, -0x1(%rsi)
 713  713          ret   
 714  714  
 715  715          .p2align 4
 716  716  L(P4Q9):
 717  717          mov    -0x4c(%rdi), %r8
 718  718          mov    %r8, -0x4c(%rsi)
 719  719  L(P4Q8):
 720  720          mov    -0x44(%rdi), %rcx
 721  721          mov    %rcx, -0x44(%rsi)
 722  722  L(P4Q7):
 723  723          mov    -0x3c(%rdi), %r10
 724  724          mov    %r10, -0x3c(%rsi)
 725  725  L(P4Q6):
 726  726          mov    -0x34(%rdi), %r8
 727  727          mov    %r8, -0x34(%rsi)
 728  728  L(P4Q5):
 729  729          mov    -0x2c(%rdi), %rcx
 730  730          mov    %rcx, -0x2c(%rsi)
 731  731  L(P4Q4):
 732  732          mov    -0x24(%rdi), %r10
 733  733          mov    %r10, -0x24(%rsi)
 734  734  L(P4Q3):
 735  735          mov    -0x1c(%rdi), %r8
 736  736          mov    %r8, -0x1c(%rsi)
 737  737  L(P4Q2):
 738  738          mov    -0x14(%rdi), %rcx
 739  739          mov    %rcx, -0x14(%rsi)
 740  740  L(P4Q1):
 741  741          mov    -0xc(%rdi), %r10
 742  742          mov    %r10, -0xc(%rsi)
 743  743  L(P4Q0):
 744  744          mov    -0x4(%rdi), %r8d
 745  745          mov    %r8d, -0x4(%rsi)
 746  746          ret   
 747  747  
 748  748          .p2align 4
 749  749  L(P5Q9):
 750  750          mov    -0x4d(%rdi), %r8
 751  751          mov    %r8, -0x4d(%rsi)
 752  752  L(P5Q8):
 753  753          mov    -0x45(%rdi), %rcx
 754  754          mov    %rcx, -0x45(%rsi)
 755  755  L(P5Q7):
 756  756          mov    -0x3d(%rdi), %r10
 757  757          mov    %r10, -0x3d(%rsi)
 758  758  L(P5Q6):
 759  759          mov    -0x35(%rdi), %r8
 760  760          mov    %r8, -0x35(%rsi)
 761  761  L(P5Q5):
 762  762          mov    -0x2d(%rdi), %rcx
 763  763          mov    %rcx, -0x2d(%rsi)
 764  764  L(P5Q4):
 765  765          mov    -0x25(%rdi), %r10
 766  766          mov    %r10, -0x25(%rsi)
 767  767  L(P5Q3):
 768  768          mov    -0x1d(%rdi), %r8
 769  769          mov    %r8, -0x1d(%rsi)
 770  770  L(P5Q2):
 771  771          mov    -0x15(%rdi), %rcx
 772  772          mov    %rcx, -0x15(%rsi)
 773  773  L(P5Q1):
 774  774          mov    -0xd(%rdi), %r10
 775  775          mov    %r10, -0xd(%rsi)
 776  776  L(P5Q0):
 777  777          mov    -0x5(%rdi), %r8d
 778  778          movzbq -0x1(%rdi), %r10
 779  779          mov    %r8d, -0x5(%rsi)
 780  780          mov    %r10b, -0x1(%rsi)
 781  781          ret   
 782  782  
 783  783          .p2align 4
 784  784  L(P6Q9):
 785  785          mov    -0x4e(%rdi), %r8
 786  786          mov    %r8, -0x4e(%rsi)
 787  787  L(P6Q8):
 788  788          mov    -0x46(%rdi), %rcx
 789  789          mov    %rcx, -0x46(%rsi)
 790  790  L(P6Q7):
 791  791          mov    -0x3e(%rdi), %r10
 792  792          mov    %r10, -0x3e(%rsi)
 793  793  L(P6Q6):
 794  794          mov    -0x36(%rdi), %r8
 795  795          mov    %r8, -0x36(%rsi)
 796  796  L(P6Q5):
 797  797          mov    -0x2e(%rdi), %rcx
 798  798          mov    %rcx, -0x2e(%rsi)
 799  799  L(P6Q4):
 800  800          mov    -0x26(%rdi), %r10
 801  801          mov    %r10, -0x26(%rsi)
 802  802  L(P6Q3):
 803  803          mov    -0x1e(%rdi), %r8
 804  804          mov    %r8, -0x1e(%rsi)
 805  805  L(P6Q2):
 806  806          mov    -0x16(%rdi), %rcx
 807  807          mov    %rcx, -0x16(%rsi)
 808  808  L(P6Q1):
 809  809          mov    -0xe(%rdi), %r10
 810  810          mov    %r10, -0xe(%rsi)
 811  811  L(P6Q0):
 812  812          mov    -0x6(%rdi), %r8d
 813  813          movzwq -0x2(%rdi), %r10
 814  814          mov    %r8d, -0x6(%rsi)
 815  815          mov    %r10w, -0x2(%rsi)
 816  816          ret   
 817  817  
 818  818          .p2align 4
 819  819  L(P7Q9):
 820  820          mov    -0x4f(%rdi), %r8
 821  821          mov    %r8, -0x4f(%rsi)
 822  822  L(P7Q8):
 823  823          mov    -0x47(%rdi), %rcx
 824  824          mov    %rcx, -0x47(%rsi)
 825  825  L(P7Q7):
 826  826          mov    -0x3f(%rdi), %r10
 827  827          mov    %r10, -0x3f(%rsi)
 828  828  L(P7Q6):
 829  829          mov    -0x37(%rdi), %r8
 830  830          mov    %r8, -0x37(%rsi)
 831  831  L(P7Q5):
 832  832          mov    -0x2f(%rdi), %rcx
 833  833          mov    %rcx, -0x2f(%rsi)
 834  834  L(P7Q4):
 835  835          mov    -0x27(%rdi), %r10
 836  836          mov    %r10, -0x27(%rsi)
 837  837  L(P7Q3):
 838  838          mov    -0x1f(%rdi), %r8
 839  839          mov    %r8, -0x1f(%rsi)
 840  840  L(P7Q2):
 841  841          mov    -0x17(%rdi), %rcx
 842  842          mov    %rcx, -0x17(%rsi)
 843  843  L(P7Q1):
 844  844          mov    -0xf(%rdi), %r10
 845  845          mov    %r10, -0xf(%rsi)
 846  846  L(P7Q0):
 847  847          mov    -0x7(%rdi), %r8d
 848  848          movzwq -0x3(%rdi), %r10
 849  849          movzbq -0x1(%rdi), %rcx
 850  850          mov    %r8d, -0x7(%rsi)
 851  851          mov    %r10w, -0x3(%rsi)
 852  852          mov    %cl, -0x1(%rsi)
 853  853          ret   
 854  854  
 855  855          /*
 856  856           * For large sizes rep smovq is fastest.
 857  857           * Transition point determined experimentally as measured on
 858  858           * Intel Xeon processors (incl. Nehalem and previous generations) and
  
    | ↓ open down ↓ | 809 lines elided | ↑ open up ↑ | 
 859  859           * AMD Opteron. The transition value is patched at boot time to avoid
 860  860           * memory reference hit.
 861  861           */
 862  862          .globl bcopy_patch_start
 863  863  bcopy_patch_start:
 864  864          cmpq    $BCOPY_NHM_REP, %rdx
 865  865          .globl bcopy_patch_end
 866  866  bcopy_patch_end:
 867  867  
 868  868          .p2align 4
 869      -        .globl bcopy_ck_size
 870      -bcopy_ck_size:
      869 +        ALTENTRY(bcopy_ck_size)
      870 +
 871  871          cmpq    $BCOPY_DFLT_REP, %rdx
 872  872          jae     L(use_rep)
 873  873  
 874  874          /*
 875  875           * Align to a 8-byte boundary. Avoids penalties from unaligned stores
 876  876           * as well as from stores spanning cachelines.
 877  877           */
 878  878          test    $0x7, %rsi
 879  879          jz      L(aligned_loop)
 880  880          test    $0x1, %rsi
 881  881          jz      2f
 882  882          movzbq  (%rdi), %r8
 883  883          dec     %rdx
 884  884          inc     %rdi
 885  885          mov     %r8b, (%rsi)
 886  886          inc     %rsi
 887  887  2:
 888  888          test    $0x2, %rsi
 889  889          jz      4f
 890  890          movzwq  (%rdi), %r8
 891  891          sub     $0x2, %rdx
 892  892          add     $0x2, %rdi
 893  893          mov     %r8w, (%rsi)
 894  894          add     $0x2, %rsi
 895  895  4:
 896  896          test    $0x4, %rsi
 897  897          jz      L(aligned_loop)
 898  898          mov     (%rdi), %r8d
 899  899          sub     $0x4, %rdx
 900  900          add     $0x4, %rdi
 901  901          mov     %r8d, (%rsi)
 902  902          add     $0x4, %rsi
 903  903  
 904  904          /*
 905  905           * Copy 64-bytes per loop
 906  906           */
 907  907          .p2align 4
 908  908  L(aligned_loop):
 909  909          mov     (%rdi), %r8
 910  910          mov     0x8(%rdi), %r10
 911  911          lea     -0x40(%rdx), %rdx
 912  912          mov     %r8, (%rsi)
 913  913          mov     %r10, 0x8(%rsi)
 914  914          mov     0x10(%rdi), %rcx
 915  915          mov     0x18(%rdi), %r8
 916  916          mov     %rcx, 0x10(%rsi)
 917  917          mov     %r8, 0x18(%rsi)
 918  918  
 919  919          cmp     $0x40, %rdx
 920  920          mov     0x20(%rdi), %r10
 921  921          mov     0x28(%rdi), %rcx
 922  922          mov     %r10, 0x20(%rsi)
 923  923          mov     %rcx, 0x28(%rsi)
 924  924          mov     0x30(%rdi), %r8
 925  925          mov     0x38(%rdi), %r10
 926  926          lea     0x40(%rdi), %rdi
 927  927          mov     %r8, 0x30(%rsi)
 928  928          mov     %r10, 0x38(%rsi)
 929  929          lea     0x40(%rsi), %rsi
 930  930          jae     L(aligned_loop)
 931  931  
 932  932          /*
 933  933           * Copy remaining bytes (0-63)
 934  934           */
 935  935  L(do_remainder):
 936  936          leaq    L(fwdPxQx)(%rip), %r10
 937  937          addq    %rdx, %rdi
 938  938          addq    %rdx, %rsi
 939  939          movslq  (%r10,%rdx,4), %rcx
 940  940          leaq    (%rcx,%r10,1), %r10
 941  941          jmpq    *%r10
 942  942  
 943  943          /*
 944  944           * Use rep smovq. Clear remainder via unrolled code
 945  945           */
 946  946          .p2align 4
 947  947  L(use_rep):
 948  948          xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
  
    | ↓ open down ↓ | 68 lines elided | ↑ open up ↑ | 
 949  949          movq    %rdx, %rcx              /* %rcx = count */
 950  950          shrq    $3, %rcx                /* 8-byte word count */
 951  951          rep
 952  952            smovq
 953  953  
 954  954          xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955  955          andq    $7, %rdx                /* remainder */
 956  956          jnz     L(do_remainder)
 957  957          ret
 958  958  #undef  L
      959 +        SET_SIZE(bcopy_ck_size)
 959  960  
 960  961  #ifdef DEBUG
 961  962          /*
 962  963           * Setup frame on the run-time stack. The end of the input argument
 963  964           * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
 964  965           * always points to the end of the latest allocated stack frame.
 965  966           * panic(const char *format, ...) is a varargs function. When a
 966  967           * function taking variable arguments is called, %rax must be set
 967  968           * to eight times the number of floating point parameters passed
 968  969           * to the function in SSE registers.
 969  970           */
 970  971  call_panic:
 971  972          pushq   %rbp                    /* align stack properly */
 972  973          movq    %rsp, %rbp
 973  974          xorl    %eax, %eax              /* no variable arguments */
 974  975          call    panic                   /* %rdi = format string */
 975  976  #endif
 976  977          SET_SIZE(bcopy_altentry)
 977  978          SET_SIZE(bcopy)
 978  979  
 979  980  #elif defined(__i386)
 980  981  
 981  982  #define ARG_FROM        4
 982  983  #define ARG_TO          8
 983  984  #define ARG_COUNT       12
 984  985  
 985  986          ENTRY(bcopy)
 986  987  #ifdef DEBUG
 987  988          movl    ARG_COUNT(%esp), %eax
 988  989          orl     %eax, %eax
 989  990          jz      1f
 990  991          movl    postbootkernelbase, %eax
 991  992          cmpl    %eax, ARG_FROM(%esp)
 992  993          jb      0f
 993  994          cmpl    %eax, ARG_TO(%esp)
 994  995          jnb     1f
 995  996  0:      pushl   %ebp
 996  997          movl    %esp, %ebp
 997  998          pushl   $.bcopy_panic_msg
 998  999          call    panic
 999 1000  1:
1000 1001  #endif
1001 1002  do_copy:
1002 1003          movl    %esi, %eax              /* save registers */
1003 1004          movl    %edi, %edx
1004 1005          movl    ARG_COUNT(%esp), %ecx
1005 1006          movl    ARG_FROM(%esp), %esi
1006 1007          movl    ARG_TO(%esp), %edi
1007 1008  
1008 1009          shrl    $2, %ecx                /* word count */
1009 1010          rep
1010 1011            smovl
1011 1012          movl    ARG_COUNT(%esp), %ecx
1012 1013          andl    $3, %ecx                /* bytes left over */
1013 1014          rep
1014 1015            smovb
1015 1016          movl    %eax, %esi              /* restore registers */
1016 1017          movl    %edx, %edi
1017 1018          ret
1018 1019          SET_SIZE(bcopy)
1019 1020  
1020 1021  #undef  ARG_COUNT
1021 1022  #undef  ARG_FROM
1022 1023  #undef  ARG_TO
1023 1024  
1024 1025  #endif  /* __i386 */
1025 1026  #endif  /* __lint */
1026 1027  
1027 1028  
1028 1029  /*
1029 1030   * Zero a block of storage, returning an error code if we
1030 1031   * take a kernel pagefault which cannot be resolved.
1031 1032   * Returns errno value on pagefault error, 0 if all ok
1032 1033   */
1033 1034  
1034 1035  #if defined(__lint)
1035 1036  
1036 1037  /* ARGSUSED */
1037 1038  int
1038 1039  kzero(void *addr, size_t count)
1039 1040  { return (0); }
1040 1041  
1041 1042  #else   /* __lint */
1042 1043  
1043 1044  #if defined(__amd64)
1044 1045  
1045 1046          ENTRY(kzero)
1046 1047  #ifdef DEBUG
1047 1048          cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1048 1049          jnb     0f
1049 1050          leaq    .kzero_panic_msg(%rip), %rdi
1050 1051          jmp     call_panic              /* setup stack and call panic */
1051 1052  0:
1052 1053  #endif
1053 1054          /*
1054 1055           * pass lofault value as 3rd argument for fault return 
1055 1056           */
1056 1057          leaq    _kzeroerr(%rip), %rdx
1057 1058  
1058 1059          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
1059 1060          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
1060 1061          movq    %rdx, T_LOFAULT(%r9)    /* new lofault */
1061 1062          call    bzero_altentry
1062 1063          xorl    %eax, %eax
1063 1064          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1064 1065          ret
1065 1066          /*
1066 1067           * A fault during bzero is indicated through an errno value
1067 1068           * in %rax when we iretq to here.
1068 1069           */
1069 1070  _kzeroerr:
1070 1071          addq    $8, %rsp                /* pop bzero_altentry call ret addr */
1071 1072          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1072 1073          ret
1073 1074          SET_SIZE(kzero)
1074 1075  
1075 1076  #elif defined(__i386)
1076 1077  
1077 1078  #define ARG_ADDR        8
1078 1079  #define ARG_COUNT       12
1079 1080  
1080 1081          ENTRY(kzero)
1081 1082  #ifdef DEBUG
1082 1083          pushl   %ebp
1083 1084          movl    %esp, %ebp
1084 1085          movl    postbootkernelbase, %eax
1085 1086          cmpl    %eax, ARG_ADDR(%ebp)
1086 1087          jnb     0f
1087 1088          pushl   $.kzero_panic_msg
1088 1089          call    panic
1089 1090  0:      popl    %ebp
1090 1091  #endif
1091 1092          lea     _kzeroerr, %eax         /* kzeroerr is lofault value */
1092 1093  
1093 1094          pushl   %ebp                    /* save stack base */
1094 1095          movl    %esp, %ebp              /* set new stack base */
1095 1096          pushl   %edi                    /* save %edi */
1096 1097  
1097 1098          mov     %gs:CPU_THREAD, %edx    
1098 1099          movl    T_LOFAULT(%edx), %edi
1099 1100          pushl   %edi                    /* save the current lofault */
1100 1101          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
1101 1102  
1102 1103          movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1103 1104          movl    ARG_ADDR(%ebp), %edi    /* %edi <- address of bytes to clear */
1104 1105          shrl    $2, %ecx                /* Count of double words to zero */
1105 1106          xorl    %eax, %eax              /* sstol val */
1106 1107          rep
1107 1108            sstol                 /* %ecx contains words to clear (%eax=0) */
1108 1109  
1109 1110          movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1110 1111          andl    $3, %ecx                /* do mod 4 */
1111 1112          rep
1112 1113            sstob                 /* %ecx contains residual bytes to clear */
1113 1114  
1114 1115          /*
1115 1116           * A fault during kzero is indicated through an errno value
1116 1117           * in %eax when we iret to here.
1117 1118           */
1118 1119  _kzeroerr:
1119 1120          popl    %edi
1120 1121          movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
1121 1122          popl    %edi
1122 1123          popl    %ebp
1123 1124          ret
1124 1125          SET_SIZE(kzero)
1125 1126  
1126 1127  #undef  ARG_ADDR
1127 1128  #undef  ARG_COUNT
1128 1129  
1129 1130  #endif  /* __i386 */
1130 1131  #endif  /* __lint */
1131 1132  
1132 1133  /*
1133 1134   * Zero a block of storage.
1134 1135   */
1135 1136  
1136 1137  #if defined(__lint)
1137 1138  
1138 1139  /* ARGSUSED */
1139 1140  void
1140 1141  bzero(void *addr, size_t count)
1141 1142  {}
1142 1143  
1143 1144  #else   /* __lint */
1144 1145  
1145 1146  #if defined(__amd64)
1146 1147  
1147 1148          ENTRY(bzero)
1148 1149  #ifdef DEBUG
1149 1150          cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1150 1151          jnb     0f
1151 1152          leaq    .bzero_panic_msg(%rip), %rdi
1152 1153          jmp     call_panic              /* setup stack and call panic */
1153 1154  0:
1154 1155  #endif
1155 1156          ALTENTRY(bzero_altentry)
1156 1157  do_zero:
1157 1158  #define L(s) .bzero/**/s
1158 1159          xorl    %eax, %eax
1159 1160  
1160 1161          cmpq    $0x50, %rsi             /* 80 */
1161 1162          jae     L(ck_align)
1162 1163  
1163 1164          /*
1164 1165           * Performance data shows many caller's are zeroing small buffers. So
1165 1166           * for best perf for these sizes unrolled code is used. Store zeros
1166 1167           * without worrying about alignment.
1167 1168           */
1168 1169          leaq    L(setPxQx)(%rip), %r10
1169 1170          addq    %rsi, %rdi
1170 1171          movslq  (%r10,%rsi,4), %rcx
1171 1172          leaq    (%rcx,%r10,1), %r10
1172 1173          jmpq    *%r10
1173 1174  
1174 1175          .p2align 4
1175 1176  L(setPxQx):
1176 1177          .int       L(P0Q0)-L(setPxQx)   /* 0 */
1177 1178          .int       L(P1Q0)-L(setPxQx)
1178 1179          .int       L(P2Q0)-L(setPxQx)
1179 1180          .int       L(P3Q0)-L(setPxQx)
1180 1181          .int       L(P4Q0)-L(setPxQx)
1181 1182          .int       L(P5Q0)-L(setPxQx)
1182 1183          .int       L(P6Q0)-L(setPxQx)
1183 1184          .int       L(P7Q0)-L(setPxQx) 
1184 1185  
1185 1186          .int       L(P0Q1)-L(setPxQx)   /* 8 */
1186 1187          .int       L(P1Q1)-L(setPxQx)
1187 1188          .int       L(P2Q1)-L(setPxQx)
1188 1189          .int       L(P3Q1)-L(setPxQx)
1189 1190          .int       L(P4Q1)-L(setPxQx)
1190 1191          .int       L(P5Q1)-L(setPxQx)
1191 1192          .int       L(P6Q1)-L(setPxQx)
1192 1193          .int       L(P7Q1)-L(setPxQx) 
1193 1194  
1194 1195          .int       L(P0Q2)-L(setPxQx)   /* 16 */
1195 1196          .int       L(P1Q2)-L(setPxQx)
1196 1197          .int       L(P2Q2)-L(setPxQx)
1197 1198          .int       L(P3Q2)-L(setPxQx)
1198 1199          .int       L(P4Q2)-L(setPxQx)
1199 1200          .int       L(P5Q2)-L(setPxQx)
1200 1201          .int       L(P6Q2)-L(setPxQx)
1201 1202          .int       L(P7Q2)-L(setPxQx) 
1202 1203  
1203 1204          .int       L(P0Q3)-L(setPxQx)   /* 24 */
1204 1205          .int       L(P1Q3)-L(setPxQx)
1205 1206          .int       L(P2Q3)-L(setPxQx)
1206 1207          .int       L(P3Q3)-L(setPxQx)
1207 1208          .int       L(P4Q3)-L(setPxQx)
1208 1209          .int       L(P5Q3)-L(setPxQx)
1209 1210          .int       L(P6Q3)-L(setPxQx)
1210 1211          .int       L(P7Q3)-L(setPxQx) 
1211 1212  
1212 1213          .int       L(P0Q4)-L(setPxQx)   /* 32 */
1213 1214          .int       L(P1Q4)-L(setPxQx)
1214 1215          .int       L(P2Q4)-L(setPxQx)
1215 1216          .int       L(P3Q4)-L(setPxQx)
1216 1217          .int       L(P4Q4)-L(setPxQx)
1217 1218          .int       L(P5Q4)-L(setPxQx)
1218 1219          .int       L(P6Q4)-L(setPxQx)
1219 1220          .int       L(P7Q4)-L(setPxQx) 
1220 1221  
1221 1222          .int       L(P0Q5)-L(setPxQx)   /* 40 */
1222 1223          .int       L(P1Q5)-L(setPxQx)
1223 1224          .int       L(P2Q5)-L(setPxQx)
1224 1225          .int       L(P3Q5)-L(setPxQx)
1225 1226          .int       L(P4Q5)-L(setPxQx)
1226 1227          .int       L(P5Q5)-L(setPxQx)
1227 1228          .int       L(P6Q5)-L(setPxQx)
1228 1229          .int       L(P7Q5)-L(setPxQx) 
1229 1230  
1230 1231          .int       L(P0Q6)-L(setPxQx)   /* 48 */
1231 1232          .int       L(P1Q6)-L(setPxQx)
1232 1233          .int       L(P2Q6)-L(setPxQx)
1233 1234          .int       L(P3Q6)-L(setPxQx)
1234 1235          .int       L(P4Q6)-L(setPxQx)
1235 1236          .int       L(P5Q6)-L(setPxQx)
1236 1237          .int       L(P6Q6)-L(setPxQx)
1237 1238          .int       L(P7Q6)-L(setPxQx) 
1238 1239  
1239 1240          .int       L(P0Q7)-L(setPxQx)   /* 56 */
1240 1241          .int       L(P1Q7)-L(setPxQx)
1241 1242          .int       L(P2Q7)-L(setPxQx)
1242 1243          .int       L(P3Q7)-L(setPxQx)
1243 1244          .int       L(P4Q7)-L(setPxQx)
1244 1245          .int       L(P5Q7)-L(setPxQx)
1245 1246          .int       L(P6Q7)-L(setPxQx)
1246 1247          .int       L(P7Q7)-L(setPxQx) 
1247 1248  
1248 1249          .int       L(P0Q8)-L(setPxQx)   /* 64 */
1249 1250          .int       L(P1Q8)-L(setPxQx)
1250 1251          .int       L(P2Q8)-L(setPxQx)
1251 1252          .int       L(P3Q8)-L(setPxQx)
1252 1253          .int       L(P4Q8)-L(setPxQx)
1253 1254          .int       L(P5Q8)-L(setPxQx)
1254 1255          .int       L(P6Q8)-L(setPxQx)
1255 1256          .int       L(P7Q8)-L(setPxQx)
1256 1257  
1257 1258          .int       L(P0Q9)-L(setPxQx)   /* 72 */
1258 1259          .int       L(P1Q9)-L(setPxQx)
1259 1260          .int       L(P2Q9)-L(setPxQx)
1260 1261          .int       L(P3Q9)-L(setPxQx)
1261 1262          .int       L(P4Q9)-L(setPxQx)
1262 1263          .int       L(P5Q9)-L(setPxQx)
1263 1264          .int       L(P6Q9)-L(setPxQx)
1264 1265          .int       L(P7Q9)-L(setPxQx)   /* 79 */
1265 1266  
1266 1267          .p2align 4
1267 1268  L(P0Q9): mov    %rax, -0x48(%rdi)
1268 1269  L(P0Q8): mov    %rax, -0x40(%rdi)
1269 1270  L(P0Q7): mov    %rax, -0x38(%rdi)
1270 1271  L(P0Q6): mov    %rax, -0x30(%rdi)
1271 1272  L(P0Q5): mov    %rax, -0x28(%rdi)
1272 1273  L(P0Q4): mov    %rax, -0x20(%rdi)
1273 1274  L(P0Q3): mov    %rax, -0x18(%rdi)
1274 1275  L(P0Q2): mov    %rax, -0x10(%rdi)
1275 1276  L(P0Q1): mov    %rax, -0x8(%rdi)
1276 1277  L(P0Q0): 
1277 1278           ret
1278 1279  
1279 1280          .p2align 4
1280 1281  L(P1Q9): mov    %rax, -0x49(%rdi)
1281 1282  L(P1Q8): mov    %rax, -0x41(%rdi)
1282 1283  L(P1Q7): mov    %rax, -0x39(%rdi)
1283 1284  L(P1Q6): mov    %rax, -0x31(%rdi)
1284 1285  L(P1Q5): mov    %rax, -0x29(%rdi)
1285 1286  L(P1Q4): mov    %rax, -0x21(%rdi)
1286 1287  L(P1Q3): mov    %rax, -0x19(%rdi)
1287 1288  L(P1Q2): mov    %rax, -0x11(%rdi)
1288 1289  L(P1Q1): mov    %rax, -0x9(%rdi)
1289 1290  L(P1Q0): mov    %al, -0x1(%rdi)
1290 1291           ret
1291 1292  
1292 1293          .p2align 4
1293 1294  L(P2Q9): mov    %rax, -0x4a(%rdi)
1294 1295  L(P2Q8): mov    %rax, -0x42(%rdi)
1295 1296  L(P2Q7): mov    %rax, -0x3a(%rdi)
1296 1297  L(P2Q6): mov    %rax, -0x32(%rdi)
1297 1298  L(P2Q5): mov    %rax, -0x2a(%rdi)
1298 1299  L(P2Q4): mov    %rax, -0x22(%rdi)
1299 1300  L(P2Q3): mov    %rax, -0x1a(%rdi)
1300 1301  L(P2Q2): mov    %rax, -0x12(%rdi)
1301 1302  L(P2Q1): mov    %rax, -0xa(%rdi)
1302 1303  L(P2Q0): mov    %ax, -0x2(%rdi)
1303 1304           ret
1304 1305  
1305 1306          .p2align 4
1306 1307  L(P3Q9): mov    %rax, -0x4b(%rdi)
1307 1308  L(P3Q8): mov    %rax, -0x43(%rdi)
1308 1309  L(P3Q7): mov    %rax, -0x3b(%rdi)
1309 1310  L(P3Q6): mov    %rax, -0x33(%rdi)
1310 1311  L(P3Q5): mov    %rax, -0x2b(%rdi)
1311 1312  L(P3Q4): mov    %rax, -0x23(%rdi)
1312 1313  L(P3Q3): mov    %rax, -0x1b(%rdi)
1313 1314  L(P3Q2): mov    %rax, -0x13(%rdi)
1314 1315  L(P3Q1): mov    %rax, -0xb(%rdi)
1315 1316  L(P3Q0): mov    %ax, -0x3(%rdi)
1316 1317           mov    %al, -0x1(%rdi)
1317 1318           ret
1318 1319  
1319 1320          .p2align 4
1320 1321  L(P4Q9): mov    %rax, -0x4c(%rdi)
1321 1322  L(P4Q8): mov    %rax, -0x44(%rdi)
1322 1323  L(P4Q7): mov    %rax, -0x3c(%rdi)
1323 1324  L(P4Q6): mov    %rax, -0x34(%rdi)
1324 1325  L(P4Q5): mov    %rax, -0x2c(%rdi)
1325 1326  L(P4Q4): mov    %rax, -0x24(%rdi)
1326 1327  L(P4Q3): mov    %rax, -0x1c(%rdi)
1327 1328  L(P4Q2): mov    %rax, -0x14(%rdi)
1328 1329  L(P4Q1): mov    %rax, -0xc(%rdi)
1329 1330  L(P4Q0): mov    %eax, -0x4(%rdi)
1330 1331           ret
1331 1332  
1332 1333          .p2align 4
1333 1334  L(P5Q9): mov    %rax, -0x4d(%rdi)
1334 1335  L(P5Q8): mov    %rax, -0x45(%rdi)
1335 1336  L(P5Q7): mov    %rax, -0x3d(%rdi)
1336 1337  L(P5Q6): mov    %rax, -0x35(%rdi)
1337 1338  L(P5Q5): mov    %rax, -0x2d(%rdi)
1338 1339  L(P5Q4): mov    %rax, -0x25(%rdi)
1339 1340  L(P5Q3): mov    %rax, -0x1d(%rdi)
1340 1341  L(P5Q2): mov    %rax, -0x15(%rdi)
1341 1342  L(P5Q1): mov    %rax, -0xd(%rdi)
1342 1343  L(P5Q0): mov    %eax, -0x5(%rdi)
1343 1344           mov    %al, -0x1(%rdi)
1344 1345           ret
1345 1346  
1346 1347          .p2align 4
1347 1348  L(P6Q9): mov    %rax, -0x4e(%rdi)
1348 1349  L(P6Q8): mov    %rax, -0x46(%rdi)
1349 1350  L(P6Q7): mov    %rax, -0x3e(%rdi)
1350 1351  L(P6Q6): mov    %rax, -0x36(%rdi)
1351 1352  L(P6Q5): mov    %rax, -0x2e(%rdi)
1352 1353  L(P6Q4): mov    %rax, -0x26(%rdi)
1353 1354  L(P6Q3): mov    %rax, -0x1e(%rdi)
1354 1355  L(P6Q2): mov    %rax, -0x16(%rdi)
1355 1356  L(P6Q1): mov    %rax, -0xe(%rdi)
1356 1357  L(P6Q0): mov    %eax, -0x6(%rdi)
1357 1358           mov    %ax, -0x2(%rdi)
1358 1359           ret
1359 1360  
1360 1361          .p2align 4
1361 1362  L(P7Q9): mov    %rax, -0x4f(%rdi)
1362 1363  L(P7Q8): mov    %rax, -0x47(%rdi)
1363 1364  L(P7Q7): mov    %rax, -0x3f(%rdi)
1364 1365  L(P7Q6): mov    %rax, -0x37(%rdi)
1365 1366  L(P7Q5): mov    %rax, -0x2f(%rdi)
1366 1367  L(P7Q4): mov    %rax, -0x27(%rdi)
1367 1368  L(P7Q3): mov    %rax, -0x1f(%rdi)
1368 1369  L(P7Q2): mov    %rax, -0x17(%rdi)
1369 1370  L(P7Q1): mov    %rax, -0xf(%rdi)
1370 1371  L(P7Q0): mov    %eax, -0x7(%rdi)
1371 1372           mov    %ax, -0x3(%rdi)
1372 1373           mov    %al, -0x1(%rdi)
1373 1374           ret
1374 1375  
1375 1376          /*
1376 1377           * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1377 1378           * as well as from stores spanning cachelines. Note 16-byte alignment
1378 1379           * is better in case where rep sstosq is used.
1379 1380           */
1380 1381          .p2align 4
1381 1382  L(ck_align):
1382 1383          test    $0xf, %rdi
1383 1384          jz      L(aligned_now)
1384 1385          test    $1, %rdi
1385 1386          jz      2f
1386 1387          mov     %al, (%rdi)
1387 1388          dec     %rsi
1388 1389          lea     1(%rdi),%rdi
1389 1390  2:
1390 1391          test    $2, %rdi
1391 1392          jz      4f
1392 1393          mov     %ax, (%rdi)
1393 1394          sub     $2, %rsi
1394 1395          lea     2(%rdi),%rdi
1395 1396  4:
1396 1397          test    $4, %rdi
1397 1398          jz      8f
1398 1399          mov     %eax, (%rdi)
1399 1400          sub     $4, %rsi
1400 1401          lea     4(%rdi),%rdi
1401 1402  8:
1402 1403          test    $8, %rdi
1403 1404          jz      L(aligned_now)
1404 1405          mov     %rax, (%rdi)
1405 1406          sub     $8, %rsi
1406 1407          lea     8(%rdi),%rdi
1407 1408  
1408 1409          /*
1409 1410           * For large sizes rep sstoq is fastest.
1410 1411           * Transition point determined experimentally as measured on
1411 1412           * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1412 1413           */
1413 1414  L(aligned_now):
1414 1415          cmp     $BZERO_USE_REP, %rsi
1415 1416          ja      L(use_rep)
1416 1417  
1417 1418          /*
1418 1419           * zero 64-bytes per loop
1419 1420           */
1420 1421          .p2align 4
1421 1422  L(bzero_loop):
1422 1423          leaq    -0x40(%rsi), %rsi
1423 1424          cmpq    $0x40, %rsi
1424 1425          movq    %rax, (%rdi) 
1425 1426          movq    %rax, 0x8(%rdi) 
1426 1427          movq    %rax, 0x10(%rdi) 
1427 1428          movq    %rax, 0x18(%rdi) 
1428 1429          movq    %rax, 0x20(%rdi) 
1429 1430          movq    %rax, 0x28(%rdi) 
1430 1431          movq    %rax, 0x30(%rdi) 
1431 1432          movq    %rax, 0x38(%rdi) 
1432 1433          leaq    0x40(%rdi), %rdi
1433 1434          jae     L(bzero_loop)
1434 1435  
1435 1436          /*
1436 1437           * Clear any remaining bytes..
1437 1438           */
1438 1439  9:
1439 1440          leaq    L(setPxQx)(%rip), %r10
1440 1441          addq    %rsi, %rdi
1441 1442          movslq  (%r10,%rsi,4), %rcx
1442 1443          leaq    (%rcx,%r10,1), %r10
1443 1444          jmpq    *%r10
1444 1445  
1445 1446          /*
1446 1447           * Use rep sstoq. Clear any remainder via unrolled code
1447 1448           */
1448 1449          .p2align 4
1449 1450  L(use_rep):
1450 1451          movq    %rsi, %rcx              /* get size in bytes */
1451 1452          shrq    $3, %rcx                /* count of 8-byte words to zero */
1452 1453          rep
1453 1454            sstoq                         /* %rcx = words to clear (%rax=0) */
1454 1455          andq    $7, %rsi                /* remaining bytes */
1455 1456          jnz     9b
1456 1457          ret
1457 1458  #undef  L
1458 1459          SET_SIZE(bzero_altentry)
1459 1460          SET_SIZE(bzero)
1460 1461  
1461 1462  #elif defined(__i386)
1462 1463  
1463 1464  #define ARG_ADDR        4
1464 1465  #define ARG_COUNT       8
1465 1466  
1466 1467          ENTRY(bzero)
1467 1468  #ifdef DEBUG
1468 1469          movl    postbootkernelbase, %eax
1469 1470          cmpl    %eax, ARG_ADDR(%esp)
1470 1471          jnb     0f
1471 1472          pushl   %ebp
1472 1473          movl    %esp, %ebp
1473 1474          pushl   $.bzero_panic_msg
1474 1475          call    panic
1475 1476  0:
1476 1477  #endif
1477 1478  do_zero:
1478 1479          movl    %edi, %edx
1479 1480          movl    ARG_COUNT(%esp), %ecx
1480 1481          movl    ARG_ADDR(%esp), %edi
1481 1482          shrl    $2, %ecx
1482 1483          xorl    %eax, %eax
1483 1484          rep
1484 1485            sstol
1485 1486          movl    ARG_COUNT(%esp), %ecx
1486 1487          andl    $3, %ecx
1487 1488          rep
1488 1489            sstob
1489 1490          movl    %edx, %edi
1490 1491          ret
1491 1492          SET_SIZE(bzero)
1492 1493  
1493 1494  #undef  ARG_ADDR
1494 1495  #undef  ARG_COUNT
1495 1496  
1496 1497  #endif  /* __i386 */
1497 1498  #endif  /* __lint */
1498 1499  
1499 1500  /*
1500 1501   * Transfer data to and from user space -
1501 1502   * Note that these routines can cause faults
1502 1503   * It is assumed that the kernel has nothing at
1503 1504   * less than KERNELBASE in the virtual address space.
1504 1505   *
1505 1506   * Note that copyin(9F) and copyout(9F) are part of the
1506 1507   * DDI/DKI which specifies that they return '-1' on "errors."
1507 1508   *
1508 1509   * Sigh.
1509 1510   *
1510 1511   * So there's two extremely similar routines - xcopyin_nta() and
1511 1512   * xcopyout_nta() which return the errno that we've faithfully computed.
1512 1513   * This allows other callers (e.g. uiomove(9F)) to work correctly.
1513 1514   * Given that these are used pretty heavily, we expand the calling
1514 1515   * sequences inline for all flavours (rather than making wrappers).
1515 1516   */
1516 1517  
1517 1518  /*
1518 1519   * Copy user data to kernel space.
1519 1520   */
1520 1521  
1521 1522  #if defined(__lint)
1522 1523  
1523 1524  /* ARGSUSED */
1524 1525  int
1525 1526  copyin(const void *uaddr, void *kaddr, size_t count)
1526 1527  { return (0); }
1527 1528  
1528 1529  #else   /* lint */
1529 1530  
1530 1531  #if defined(__amd64)
1531 1532  
1532 1533          ENTRY(copyin)
1533 1534          pushq   %rbp
1534 1535          movq    %rsp, %rbp
1535 1536          subq    $24, %rsp
1536 1537  
1537 1538          /*
1538 1539           * save args in case we trap and need to rerun as a copyop
1539 1540           */
1540 1541          movq    %rdi, (%rsp)
1541 1542          movq    %rsi, 0x8(%rsp)
1542 1543          movq    %rdx, 0x10(%rsp)
1543 1544  
1544 1545          movq    kernelbase(%rip), %rax
1545 1546  #ifdef DEBUG
1546 1547          cmpq    %rax, %rsi              /* %rsi = kaddr */
1547 1548          jnb     1f
1548 1549          leaq    .copyin_panic_msg(%rip), %rdi
1549 1550          xorl    %eax, %eax
1550 1551          call    panic
1551 1552  1:
1552 1553  #endif
1553 1554          /*
1554 1555           * pass lofault value as 4th argument to do_copy_fault
1555 1556           */
1556 1557          leaq    _copyin_err(%rip), %rcx
1557 1558  
1558 1559          movq    %gs:CPU_THREAD, %r9
1559 1560          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1560 1561          jae     3f                      /* take copyop if uaddr > kernelbase */
1561 1562          SMAP_DISABLE_INSTR(0)
1562 1563          jmp     do_copy_fault           /* Takes care of leave for us */
1563 1564  
1564 1565  _copyin_err:
1565 1566          SMAP_ENABLE_INSTR(2)
1566 1567          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */  
1567 1568          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1568 1569  3:
1569 1570          movq    T_COPYOPS(%r9), %rax
1570 1571          cmpq    $0, %rax
1571 1572          jz      2f
1572 1573          /*
1573 1574           * reload args for the copyop
1574 1575           */
1575 1576          movq    (%rsp), %rdi
1576 1577          movq    0x8(%rsp), %rsi
1577 1578          movq    0x10(%rsp), %rdx
1578 1579          leave
1579 1580          jmp     *CP_COPYIN(%rax)
1580 1581  
1581 1582  2:      movl    $-1, %eax       
1582 1583          leave
1583 1584          ret
1584 1585          SET_SIZE(copyin)
1585 1586  
1586 1587  #elif defined(__i386)
1587 1588  
1588 1589  #define ARG_UADDR       4
1589 1590  #define ARG_KADDR       8
1590 1591  
1591 1592          ENTRY(copyin)
1592 1593          movl    kernelbase, %ecx
1593 1594  #ifdef DEBUG
1594 1595          cmpl    %ecx, ARG_KADDR(%esp)
1595 1596          jnb     1f
1596 1597          pushl   %ebp
1597 1598          movl    %esp, %ebp
1598 1599          pushl   $.copyin_panic_msg
1599 1600          call    panic
1600 1601  1:
1601 1602  #endif
1602 1603          lea     _copyin_err, %eax
1603 1604  
1604 1605          movl    %gs:CPU_THREAD, %edx
1605 1606          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1606 1607          jb      do_copy_fault
1607 1608          jmp     3f
1608 1609  
1609 1610  _copyin_err:
1610 1611          popl    %ecx
1611 1612          popl    %edi
1612 1613          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1613 1614          popl    %esi
1614 1615          popl    %ebp
1615 1616  3:
1616 1617          movl    T_COPYOPS(%edx), %eax
1617 1618          cmpl    $0, %eax
1618 1619          jz      2f
1619 1620          jmp     *CP_COPYIN(%eax)
1620 1621  
1621 1622  2:      movl    $-1, %eax
1622 1623          ret
1623 1624          SET_SIZE(copyin)
1624 1625  
1625 1626  #undef  ARG_UADDR
1626 1627  #undef  ARG_KADDR
1627 1628  
1628 1629  #endif  /* __i386 */
1629 1630  #endif  /* __lint */
1630 1631  
1631 1632  #if defined(__lint)
1632 1633  
1633 1634  /* ARGSUSED */
1634 1635  int
1635 1636  xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1636 1637  { return (0); }
1637 1638  
1638 1639  #else   /* __lint */
1639 1640  
1640 1641  #if defined(__amd64)
1641 1642  
1642 1643          ENTRY(xcopyin_nta)
1643 1644          pushq   %rbp
1644 1645          movq    %rsp, %rbp
1645 1646          subq    $24, %rsp
1646 1647  
1647 1648          /*
1648 1649           * save args in case we trap and need to rerun as a copyop
1649 1650           * %rcx is consumed in this routine so we don't need to save
1650 1651           * it.
1651 1652           */
1652 1653          movq    %rdi, (%rsp)
1653 1654          movq    %rsi, 0x8(%rsp)
1654 1655          movq    %rdx, 0x10(%rsp)
1655 1656  
1656 1657          movq    kernelbase(%rip), %rax
1657 1658  #ifdef DEBUG
1658 1659          cmpq    %rax, %rsi              /* %rsi = kaddr */
1659 1660          jnb     1f
1660 1661          leaq    .xcopyin_panic_msg(%rip), %rdi
1661 1662          xorl    %eax, %eax
1662 1663          call    panic
1663 1664  1:
1664 1665  #endif
1665 1666          movq    %gs:CPU_THREAD, %r9
1666 1667          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1667 1668          jae     4f
1668 1669          cmpq    $0, %rcx                /* No non-temporal access? */
1669 1670          /*
1670 1671           * pass lofault value as 4th argument to do_copy_fault
1671 1672           */
1672 1673          leaq    _xcopyin_err(%rip), %rcx        /* doesn't set rflags */
1673 1674          jnz     6f                      /* use regular access */
1674 1675          /*
1675 1676           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1676 1677           */
1677 1678          cmpq    $XCOPY_MIN_SIZE, %rdx
1678 1679          jae     5f
1679 1680  6:
1680 1681          SMAP_DISABLE_INSTR(1)
1681 1682          jmp     do_copy_fault
1682 1683          
1683 1684          /*
1684 1685           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1685 1686           * count is COUNT_ALIGN_SIZE aligned.
1686 1687           */
1687 1688  5:
1688 1689          movq    %rdi, %r10
1689 1690          orq     %rsi, %r10
1690 1691          andq    $NTA_ALIGN_MASK, %r10
1691 1692          orq     %rdx, %r10
1692 1693          andq    $COUNT_ALIGN_MASK, %r10
1693 1694          jnz     6b      
1694 1695          leaq    _xcopyin_nta_err(%rip), %rcx    /* doesn't set rflags */
1695 1696          SMAP_DISABLE_INSTR(2)
1696 1697          jmp     do_copy_fault_nta       /* use non-temporal access */
1697 1698          
1698 1699  4:
1699 1700          movl    $EFAULT, %eax
1700 1701          jmp     3f
1701 1702  
1702 1703          /*
1703 1704           * A fault during do_copy_fault or do_copy_fault_nta is
1704 1705           * indicated through an errno value in %rax and we iret from the
1705 1706           * trap handler to here.
1706 1707           */
1707 1708  _xcopyin_err:
1708 1709          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1709 1710  _xcopyin_nta_err:
1710 1711          SMAP_ENABLE_INSTR(3)
1711 1712          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1712 1713  3:
1713 1714          movq    T_COPYOPS(%r9), %r8
1714 1715          cmpq    $0, %r8
1715 1716          jz      2f
1716 1717  
1717 1718          /*
1718 1719           * reload args for the copyop
1719 1720           */
1720 1721          movq    (%rsp), %rdi
1721 1722          movq    0x8(%rsp), %rsi
1722 1723          movq    0x10(%rsp), %rdx
1723 1724          leave
1724 1725          jmp     *CP_XCOPYIN(%r8)
1725 1726  
1726 1727  2:      leave
1727 1728          ret
1728 1729          SET_SIZE(xcopyin_nta)
1729 1730  
1730 1731  #elif defined(__i386)
1731 1732  
1732 1733  #define ARG_UADDR       4
1733 1734  #define ARG_KADDR       8
1734 1735  #define ARG_COUNT       12
1735 1736  #define ARG_CACHED      16
1736 1737  
1737 1738          .globl  use_sse_copy
1738 1739  
1739 1740          ENTRY(xcopyin_nta)
1740 1741          movl    kernelbase, %ecx
1741 1742          lea     _xcopyin_err, %eax
1742 1743          movl    %gs:CPU_THREAD, %edx
1743 1744          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1744 1745          jae     4f
1745 1746  
1746 1747          cmpl    $0, use_sse_copy        /* no sse support */
1747 1748          jz      do_copy_fault
1748 1749  
1749 1750          cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
1750 1751          jnz     do_copy_fault
1751 1752  
1752 1753          /*
1753 1754           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1754 1755           */
1755 1756          cmpl    $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1756 1757          jb      do_copy_fault
1757 1758          
1758 1759          /*
1759 1760           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1760 1761           * count is COUNT_ALIGN_SIZE aligned.
1761 1762           */
1762 1763          movl    ARG_UADDR(%esp), %ecx
1763 1764          orl     ARG_KADDR(%esp), %ecx
1764 1765          andl    $NTA_ALIGN_MASK, %ecx
1765 1766          orl     ARG_COUNT(%esp), %ecx
1766 1767          andl    $COUNT_ALIGN_MASK, %ecx
1767 1768          jnz     do_copy_fault
1768 1769  
1769 1770          jmp     do_copy_fault_nta       /* use regular access */
1770 1771  
1771 1772  4:
1772 1773          movl    $EFAULT, %eax
1773 1774          jmp     3f
1774 1775  
1775 1776          /*
1776 1777           * A fault during do_copy_fault or do_copy_fault_nta is
1777 1778           * indicated through an errno value in %eax and we iret from the
1778 1779           * trap handler to here.
1779 1780           */
1780 1781  _xcopyin_err:
1781 1782          popl    %ecx
1782 1783          popl    %edi
1783 1784          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1784 1785          popl    %esi
1785 1786          popl    %ebp
1786 1787  3:
1787 1788          cmpl    $0, T_COPYOPS(%edx)
1788 1789          jz      2f
1789 1790          movl    T_COPYOPS(%edx), %eax
1790 1791          jmp     *CP_XCOPYIN(%eax)
1791 1792  
1792 1793  2:      rep;    ret     /* use 2 byte return instruction when branch target */
1793 1794                          /* AMD Software Optimization Guide - Section 6.2 */
1794 1795          SET_SIZE(xcopyin_nta)
1795 1796  
1796 1797  #undef  ARG_UADDR
1797 1798  #undef  ARG_KADDR
1798 1799  #undef  ARG_COUNT
1799 1800  #undef  ARG_CACHED
1800 1801  
1801 1802  #endif  /* __i386 */
1802 1803  #endif  /* __lint */
1803 1804  
1804 1805  /*
1805 1806   * Copy kernel data to user space.
1806 1807   */
1807 1808  
1808 1809  #if defined(__lint)
1809 1810  
1810 1811  /* ARGSUSED */
1811 1812  int
1812 1813  copyout(const void *kaddr, void *uaddr, size_t count)
1813 1814  { return (0); }
1814 1815  
1815 1816  #else   /* __lint */
1816 1817  
1817 1818  #if defined(__amd64)
1818 1819  
1819 1820          ENTRY(copyout)
1820 1821          pushq   %rbp
1821 1822          movq    %rsp, %rbp
1822 1823          subq    $24, %rsp
1823 1824  
1824 1825          /*
1825 1826           * save args in case we trap and need to rerun as a copyop
1826 1827           */
1827 1828          movq    %rdi, (%rsp)
1828 1829          movq    %rsi, 0x8(%rsp)
1829 1830          movq    %rdx, 0x10(%rsp)
1830 1831  
1831 1832          movq    kernelbase(%rip), %rax
1832 1833  #ifdef DEBUG
1833 1834          cmpq    %rax, %rdi              /* %rdi = kaddr */
1834 1835          jnb     1f
1835 1836          leaq    .copyout_panic_msg(%rip), %rdi
1836 1837          xorl    %eax, %eax
1837 1838          call    panic
1838 1839  1:
1839 1840  #endif
1840 1841          /*
1841 1842           * pass lofault value as 4th argument to do_copy_fault
1842 1843           */
1843 1844          leaq    _copyout_err(%rip), %rcx
1844 1845  
1845 1846          movq    %gs:CPU_THREAD, %r9
1846 1847          cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1847 1848          jae     3f                      /* take copyop if uaddr > kernelbase */
1848 1849          SMAP_DISABLE_INSTR(3)
1849 1850          jmp     do_copy_fault           /* Calls leave for us */
1850 1851  
1851 1852  _copyout_err:
1852 1853          SMAP_ENABLE_INSTR(4)
1853 1854          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1854 1855          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1855 1856  3:
1856 1857          movq    T_COPYOPS(%r9), %rax
1857 1858          cmpq    $0, %rax
1858 1859          jz      2f
1859 1860  
1860 1861          /*
1861 1862           * reload args for the copyop
1862 1863           */
1863 1864          movq    (%rsp), %rdi
1864 1865          movq    0x8(%rsp), %rsi
1865 1866          movq    0x10(%rsp), %rdx
1866 1867          leave
1867 1868          jmp     *CP_COPYOUT(%rax)
1868 1869  
1869 1870  2:      movl    $-1, %eax
1870 1871          leave
1871 1872          ret
1872 1873          SET_SIZE(copyout)
1873 1874  
1874 1875  #elif defined(__i386)
1875 1876  
1876 1877  #define ARG_KADDR       4
1877 1878  #define ARG_UADDR       8
1878 1879  
1879 1880          ENTRY(copyout)
1880 1881          movl    kernelbase, %ecx
1881 1882  #ifdef DEBUG
1882 1883          cmpl    %ecx, ARG_KADDR(%esp)
1883 1884          jnb     1f
1884 1885          pushl   %ebp
1885 1886          movl    %esp, %ebp
1886 1887          pushl   $.copyout_panic_msg
1887 1888          call    panic
1888 1889  1:
1889 1890  #endif
1890 1891          lea     _copyout_err, %eax
1891 1892          movl    %gs:CPU_THREAD, %edx
1892 1893          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1893 1894          jb      do_copy_fault
1894 1895          jmp     3f
1895 1896          
1896 1897  _copyout_err:
1897 1898          popl    %ecx
1898 1899          popl    %edi
1899 1900          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1900 1901          popl    %esi
1901 1902          popl    %ebp
1902 1903  3:
1903 1904          movl    T_COPYOPS(%edx), %eax
1904 1905          cmpl    $0, %eax
1905 1906          jz      2f
1906 1907          jmp     *CP_COPYOUT(%eax)
1907 1908  
1908 1909  2:      movl    $-1, %eax
1909 1910          ret
1910 1911          SET_SIZE(copyout)
1911 1912  
1912 1913  #undef  ARG_UADDR
1913 1914  #undef  ARG_KADDR
1914 1915  
1915 1916  #endif  /* __i386 */
1916 1917  #endif  /* __lint */
1917 1918  
1918 1919  #if defined(__lint)
1919 1920  
1920 1921  /* ARGSUSED */
1921 1922  int
1922 1923  xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1923 1924  { return (0); }
1924 1925  
1925 1926  #else   /* __lint */
1926 1927  
1927 1928  #if defined(__amd64)
1928 1929  
1929 1930          ENTRY(xcopyout_nta)
1930 1931          pushq   %rbp
1931 1932          movq    %rsp, %rbp
1932 1933          subq    $24, %rsp
1933 1934  
1934 1935          /*
1935 1936           * save args in case we trap and need to rerun as a copyop
1936 1937           */
1937 1938          movq    %rdi, (%rsp)
1938 1939          movq    %rsi, 0x8(%rsp)
1939 1940          movq    %rdx, 0x10(%rsp)
1940 1941  
1941 1942          movq    kernelbase(%rip), %rax
1942 1943  #ifdef DEBUG
1943 1944          cmpq    %rax, %rdi              /* %rdi = kaddr */
1944 1945          jnb     1f
1945 1946          leaq    .xcopyout_panic_msg(%rip), %rdi
1946 1947          xorl    %eax, %eax
1947 1948          call    panic
1948 1949  1:
1949 1950  #endif
1950 1951          movq    %gs:CPU_THREAD, %r9
1951 1952          cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1952 1953          jae     4f
1953 1954  
1954 1955          cmpq    $0, %rcx                /* No non-temporal access? */
1955 1956          /*
1956 1957           * pass lofault value as 4th argument to do_copy_fault
1957 1958           */
1958 1959          leaq    _xcopyout_err(%rip), %rcx
1959 1960          jnz     6f
1960 1961          /*
1961 1962           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1962 1963           */
1963 1964          cmpq    $XCOPY_MIN_SIZE, %rdx
1964 1965          jae     5f
1965 1966  6:
1966 1967          SMAP_DISABLE_INSTR(4)
1967 1968          jmp     do_copy_fault
1968 1969          
1969 1970          /*
1970 1971           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1971 1972           * count is COUNT_ALIGN_SIZE aligned.
1972 1973           */
1973 1974  5:
1974 1975          movq    %rdi, %r10
1975 1976          orq     %rsi, %r10
1976 1977          andq    $NTA_ALIGN_MASK, %r10
1977 1978          orq     %rdx, %r10
1978 1979          andq    $COUNT_ALIGN_MASK, %r10
1979 1980          jnz     6b      
1980 1981          leaq    _xcopyout_nta_err(%rip), %rcx
1981 1982          SMAP_DISABLE_INSTR(5)
1982 1983          call    do_copy_fault_nta
1983 1984          SMAP_ENABLE_INSTR(5)
1984 1985          ret
1985 1986  
1986 1987  4:
1987 1988          movl    $EFAULT, %eax
1988 1989          jmp     3f
1989 1990  
1990 1991          /*
1991 1992           * A fault during do_copy_fault or do_copy_fault_nta is
1992 1993           * indicated through an errno value in %rax and we iret from the
1993 1994           * trap handler to here.
1994 1995           */
1995 1996  _xcopyout_err:
1996 1997          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1997 1998  _xcopyout_nta_err:
1998 1999          SMAP_ENABLE_INSTR(6)
1999 2000          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2000 2001  3:
2001 2002          movq    T_COPYOPS(%r9), %r8
2002 2003          cmpq    $0, %r8
2003 2004          jz      2f
2004 2005  
2005 2006          /*
2006 2007           * reload args for the copyop
2007 2008           */
2008 2009          movq    (%rsp), %rdi
2009 2010          movq    0x8(%rsp), %rsi
2010 2011          movq    0x10(%rsp), %rdx
2011 2012          leave
2012 2013          jmp     *CP_XCOPYOUT(%r8)
2013 2014  
2014 2015  2:      leave
2015 2016          ret
2016 2017          SET_SIZE(xcopyout_nta)
2017 2018  
2018 2019  #elif defined(__i386)
2019 2020  
2020 2021  #define ARG_KADDR       4
2021 2022  #define ARG_UADDR       8
2022 2023  #define ARG_COUNT       12
2023 2024  #define ARG_CACHED      16
2024 2025  
2025 2026          ENTRY(xcopyout_nta)
2026 2027          movl    kernelbase, %ecx
2027 2028          lea     _xcopyout_err, %eax
2028 2029          movl    %gs:CPU_THREAD, %edx
2029 2030          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2030 2031          jae     4f
2031 2032  
2032 2033          cmpl    $0, use_sse_copy        /* no sse support */
2033 2034          jz      do_copy_fault
2034 2035  
2035 2036          cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
2036 2037          jnz     do_copy_fault
2037 2038  
2038 2039          /*
2039 2040           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2040 2041           */
2041 2042          cmpl    $XCOPY_MIN_SIZE, %edx
2042 2043          jb      do_copy_fault
2043 2044          
2044 2045          /*
2045 2046           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2046 2047           * count is COUNT_ALIGN_SIZE aligned.
2047 2048           */
2048 2049          movl    ARG_UADDR(%esp), %ecx
2049 2050          orl     ARG_KADDR(%esp), %ecx
2050 2051          andl    $NTA_ALIGN_MASK, %ecx
2051 2052          orl     ARG_COUNT(%esp), %ecx
2052 2053          andl    $COUNT_ALIGN_MASK, %ecx
2053 2054          jnz     do_copy_fault
2054 2055          jmp     do_copy_fault_nta
2055 2056  
2056 2057  4:
2057 2058          movl    $EFAULT, %eax
2058 2059          jmp     3f
2059 2060  
2060 2061          /*
2061 2062           * A fault during do_copy_fault or do_copy_fault_nta is
2062 2063           * indicated through an errno value in %eax and we iret from the
2063 2064           * trap handler to here.
2064 2065           */
2065 2066  _xcopyout_err:
2066 2067          / restore the original lofault
2067 2068          popl    %ecx
2068 2069          popl    %edi
2069 2070          movl    %ecx, T_LOFAULT(%edx)   / original lofault
2070 2071          popl    %esi
2071 2072          popl    %ebp
2072 2073  3:
2073 2074          cmpl    $0, T_COPYOPS(%edx)
2074 2075          jz      2f
2075 2076          movl    T_COPYOPS(%edx), %eax
2076 2077          jmp     *CP_XCOPYOUT(%eax)
2077 2078  
2078 2079  2:      rep;    ret     /* use 2 byte return instruction when branch target */
2079 2080                          /* AMD Software Optimization Guide - Section 6.2 */
2080 2081          SET_SIZE(xcopyout_nta)
2081 2082  
2082 2083  #undef  ARG_UADDR
2083 2084  #undef  ARG_KADDR
2084 2085  #undef  ARG_COUNT
2085 2086  #undef  ARG_CACHED
2086 2087  
2087 2088  #endif  /* __i386 */
2088 2089  #endif  /* __lint */
2089 2090  
2090 2091  /*
2091 2092   * Copy a null terminated string from one point to another in
2092 2093   * the kernel address space.
2093 2094   */
2094 2095  
2095 2096  #if defined(__lint)
2096 2097  
2097 2098  /* ARGSUSED */
2098 2099  int
2099 2100  copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2100 2101  { return (0); }
2101 2102  
2102 2103  #else   /* __lint */
2103 2104  
2104 2105  #if defined(__amd64)
2105 2106  
2106 2107          ENTRY(copystr)
2107 2108          pushq   %rbp
2108 2109          movq    %rsp, %rbp
2109 2110  #ifdef DEBUG
2110 2111          movq    kernelbase(%rip), %rax
2111 2112          cmpq    %rax, %rdi              /* %rdi = from */
2112 2113          jb      0f
2113 2114          cmpq    %rax, %rsi              /* %rsi = to */
2114 2115          jnb     1f
2115 2116  0:      leaq    .copystr_panic_msg(%rip), %rdi
2116 2117          xorl    %eax, %eax
2117 2118          call    panic
2118 2119  1:
2119 2120  #endif
2120 2121          movq    %gs:CPU_THREAD, %r9
2121 2122          movq    T_LOFAULT(%r9), %r8     /* pass current lofault value as */
2122 2123                                          /* 5th argument to do_copystr */
2123 2124          xorl    %r10d,%r10d             /* pass smap restore need in %r10d */
2124 2125                                          /* as a non-ABI 6th arg */
2125 2126  do_copystr:
2126 2127          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
2127 2128          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
2128 2129          movq    %r8, T_LOFAULT(%r9)     /* new lofault */
2129 2130  
2130 2131          movq    %rdx, %r8               /* save maxlength */
2131 2132  
2132 2133          cmpq    $0, %rdx                /* %rdx = maxlength */
2133 2134          je      copystr_enametoolong    /* maxlength == 0 */
2134 2135  
2135 2136  copystr_loop:
2136 2137          decq    %r8
2137 2138          movb    (%rdi), %al
2138 2139          incq    %rdi
2139 2140          movb    %al, (%rsi)
2140 2141          incq    %rsi
2141 2142          cmpb    $0, %al
2142 2143          je      copystr_null            /* null char */
2143 2144          cmpq    $0, %r8
2144 2145          jne     copystr_loop
2145 2146  
2146 2147  copystr_enametoolong:
2147 2148          movl    $ENAMETOOLONG, %eax
2148 2149          jmp     copystr_out
2149 2150  
2150 2151  copystr_null:
2151 2152          xorl    %eax, %eax              /* no error */
2152 2153  
2153 2154  copystr_out:
2154 2155          cmpq    $0, %rcx                /* want length? */
2155 2156          je      copystr_smap            /* no */
2156 2157          subq    %r8, %rdx               /* compute length and store it */
2157 2158          movq    %rdx, (%rcx)
2158 2159  
2159 2160  copystr_smap:
2160 2161          cmpl    $0, %r10d
2161 2162          jz      copystr_done
2162 2163          SMAP_ENABLE_INSTR(7)
2163 2164  
2164 2165  copystr_done:
2165 2166          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2166 2167          leave
2167 2168          ret
2168 2169          SET_SIZE(copystr)
2169 2170  
2170 2171  #elif defined(__i386)
2171 2172  
2172 2173  #define ARG_FROM        8
2173 2174  #define ARG_TO          12
2174 2175  #define ARG_MAXLEN      16
2175 2176  #define ARG_LENCOPIED   20
2176 2177  
2177 2178          ENTRY(copystr)
2178 2179  #ifdef DEBUG
2179 2180          pushl   %ebp
2180 2181          movl    %esp, %ebp
2181 2182          movl    kernelbase, %eax
2182 2183          cmpl    %eax, ARG_FROM(%esp)
2183 2184          jb      0f
2184 2185          cmpl    %eax, ARG_TO(%esp)
2185 2186          jnb     1f
2186 2187  0:      pushl   $.copystr_panic_msg
2187 2188          call    panic
2188 2189  1:      popl    %ebp
2189 2190  #endif
2190 2191          /* get the current lofault address */
2191 2192          movl    %gs:CPU_THREAD, %eax
2192 2193          movl    T_LOFAULT(%eax), %eax
2193 2194  do_copystr:
2194 2195          pushl   %ebp                    /* setup stack frame */
2195 2196          movl    %esp, %ebp
2196 2197          pushl   %ebx                    /* save registers */
2197 2198          pushl   %edi
2198 2199  
2199 2200          movl    %gs:CPU_THREAD, %ebx    
2200 2201          movl    T_LOFAULT(%ebx), %edi
2201 2202          pushl   %edi                    /* save the current lofault */
2202 2203          movl    %eax, T_LOFAULT(%ebx)   /* new lofault */
2203 2204  
2204 2205          movl    ARG_MAXLEN(%ebp), %ecx
2205 2206          cmpl    $0, %ecx
2206 2207          je      copystr_enametoolong    /* maxlength == 0 */
2207 2208  
2208 2209          movl    ARG_FROM(%ebp), %ebx    /* source address */
2209 2210          movl    ARG_TO(%ebp), %edx      /* destination address */
2210 2211  
2211 2212  copystr_loop:
2212 2213          decl    %ecx
2213 2214          movb    (%ebx), %al
2214 2215          incl    %ebx    
2215 2216          movb    %al, (%edx)
2216 2217          incl    %edx
2217 2218          cmpb    $0, %al
2218 2219          je      copystr_null            /* null char */
2219 2220          cmpl    $0, %ecx
2220 2221          jne     copystr_loop
2221 2222  
2222 2223  copystr_enametoolong:
2223 2224          movl    $ENAMETOOLONG, %eax
2224 2225          jmp     copystr_out
2225 2226  
2226 2227  copystr_null:
2227 2228          xorl    %eax, %eax              /* no error */
2228 2229  
2229 2230  copystr_out:
2230 2231          cmpl    $0, ARG_LENCOPIED(%ebp) /* want length? */
2231 2232          je      copystr_done            /* no */
2232 2233          movl    ARG_MAXLEN(%ebp), %edx
2233 2234          subl    %ecx, %edx              /* compute length and store it */
2234 2235          movl    ARG_LENCOPIED(%ebp), %ecx
2235 2236          movl    %edx, (%ecx)
2236 2237  
2237 2238  copystr_done:
2238 2239          popl    %edi
2239 2240          movl    %gs:CPU_THREAD, %ebx    
2240 2241          movl    %edi, T_LOFAULT(%ebx)   /* restore the original lofault */
2241 2242  
2242 2243          popl    %edi
2243 2244          popl    %ebx
2244 2245          popl    %ebp
2245 2246          ret     
2246 2247          SET_SIZE(copystr)
2247 2248  
2248 2249  #undef  ARG_FROM
2249 2250  #undef  ARG_TO
2250 2251  #undef  ARG_MAXLEN
2251 2252  #undef  ARG_LENCOPIED
2252 2253  
2253 2254  #endif  /* __i386 */
2254 2255  #endif  /* __lint */
2255 2256  
2256 2257  /*
2257 2258   * Copy a null terminated string from the user address space into
2258 2259   * the kernel address space.
2259 2260   */
2260 2261  
2261 2262  #if defined(__lint)
2262 2263  
2263 2264  /* ARGSUSED */
2264 2265  int
2265 2266  copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2266 2267      size_t *lencopied)
2267 2268  { return (0); }
2268 2269  
2269 2270  #else   /* __lint */
2270 2271  
2271 2272  #if defined(__amd64)
2272 2273  
2273 2274          ENTRY(copyinstr)
2274 2275          pushq   %rbp
2275 2276          movq    %rsp, %rbp
2276 2277          subq    $32, %rsp
2277 2278  
2278 2279          /*
2279 2280           * save args in case we trap and need to rerun as a copyop
2280 2281           */
2281 2282          movq    %rdi, (%rsp)
2282 2283          movq    %rsi, 0x8(%rsp)
2283 2284          movq    %rdx, 0x10(%rsp)
2284 2285          movq    %rcx, 0x18(%rsp)
2285 2286  
2286 2287          movq    kernelbase(%rip), %rax
2287 2288  #ifdef DEBUG
2288 2289          cmpq    %rax, %rsi              /* %rsi = kaddr */
2289 2290          jnb     1f
2290 2291          leaq    .copyinstr_panic_msg(%rip), %rdi
2291 2292          xorl    %eax, %eax
2292 2293          call    panic
2293 2294  1:
2294 2295  #endif
2295 2296          /*
2296 2297           * pass lofault value as 5th argument to do_copystr
2297 2298           * do_copystr expects whether or not we need smap in %r10d
2298 2299           */
2299 2300          leaq    _copyinstr_error(%rip), %r8
2300 2301          movl    $1, %r10d
2301 2302  
2302 2303          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
2303 2304          jae     4f
2304 2305          SMAP_DISABLE_INSTR(6)
2305 2306          jmp     do_copystr
2306 2307  4:
2307 2308          movq    %gs:CPU_THREAD, %r9
2308 2309          jmp     3f
2309 2310  
2310 2311  _copyinstr_error:
2311 2312          SMAP_ENABLE_INSTR(8)
2312 2313          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2313 2314  3:
2314 2315          movq    T_COPYOPS(%r9), %rax
2315 2316          cmpq    $0, %rax
2316 2317          jz      2f
2317 2318  
2318 2319          /*
2319 2320           * reload args for the copyop
2320 2321           */
2321 2322          movq    (%rsp), %rdi
2322 2323          movq    0x8(%rsp), %rsi
2323 2324          movq    0x10(%rsp), %rdx
2324 2325          movq    0x18(%rsp), %rcx
2325 2326          leave
2326 2327          jmp     *CP_COPYINSTR(%rax)
2327 2328          
2328 2329  2:      movl    $EFAULT, %eax           /* return EFAULT */
2329 2330          leave
2330 2331          ret
2331 2332          SET_SIZE(copyinstr)
2332 2333  
2333 2334  #elif defined(__i386)
2334 2335  
2335 2336  #define ARG_UADDR       4
2336 2337  #define ARG_KADDR       8
2337 2338  
2338 2339          ENTRY(copyinstr)
2339 2340          movl    kernelbase, %ecx
2340 2341  #ifdef DEBUG
2341 2342          cmpl    %ecx, ARG_KADDR(%esp)
2342 2343          jnb     1f
2343 2344          pushl   %ebp
2344 2345          movl    %esp, %ebp
2345 2346          pushl   $.copyinstr_panic_msg
2346 2347          call    panic
2347 2348  1:
2348 2349  #endif
2349 2350          lea     _copyinstr_error, %eax
2350 2351          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2351 2352          jb      do_copystr
2352 2353          movl    %gs:CPU_THREAD, %edx
2353 2354          jmp     3f
2354 2355  
2355 2356  _copyinstr_error:
2356 2357          popl    %edi
2357 2358          movl    %gs:CPU_THREAD, %edx    
2358 2359          movl    %edi, T_LOFAULT(%edx)   /* original lofault */
2359 2360  
2360 2361          popl    %edi
2361 2362          popl    %ebx
2362 2363          popl    %ebp
2363 2364  3:
2364 2365          movl    T_COPYOPS(%edx), %eax
2365 2366          cmpl    $0, %eax
2366 2367          jz      2f
2367 2368          jmp     *CP_COPYINSTR(%eax)
2368 2369          
2369 2370  2:      movl    $EFAULT, %eax           /* return EFAULT */
2370 2371          ret
2371 2372          SET_SIZE(copyinstr)
2372 2373  
2373 2374  #undef  ARG_UADDR
2374 2375  #undef  ARG_KADDR
2375 2376  
2376 2377  #endif  /* __i386 */
2377 2378  #endif  /* __lint */
2378 2379  
2379 2380  /*
2380 2381   * Copy a null terminated string from the kernel
2381 2382   * address space to the user address space.
2382 2383   */
2383 2384  
2384 2385  #if defined(__lint)
2385 2386  
2386 2387  /* ARGSUSED */
2387 2388  int
2388 2389  copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2389 2390      size_t *lencopied)
2390 2391  { return (0); }
2391 2392  
2392 2393  #else   /* __lint */
2393 2394  
2394 2395  #if defined(__amd64)
2395 2396  
2396 2397          ENTRY(copyoutstr)
2397 2398          pushq   %rbp
2398 2399          movq    %rsp, %rbp
2399 2400          subq    $32, %rsp
2400 2401  
2401 2402          /*
2402 2403           * save args in case we trap and need to rerun as a copyop
2403 2404           */
2404 2405          movq    %rdi, (%rsp)
2405 2406          movq    %rsi, 0x8(%rsp)
2406 2407          movq    %rdx, 0x10(%rsp)
2407 2408          movq    %rcx, 0x18(%rsp)
2408 2409  
2409 2410          movq    kernelbase(%rip), %rax
2410 2411  #ifdef DEBUG
2411 2412          cmpq    %rax, %rdi              /* %rdi = kaddr */
2412 2413          jnb     1f
2413 2414          leaq    .copyoutstr_panic_msg(%rip), %rdi
2414 2415          jmp     call_panic              /* setup stack and call panic */
2415 2416  1:
2416 2417  #endif
2417 2418          /*
2418 2419           * pass lofault value as 5th argument to do_copystr
2419 2420           * pass one as 6th argument to do_copystr in %r10d
2420 2421           */
2421 2422          leaq    _copyoutstr_error(%rip), %r8
2422 2423          movl    $1, %r10d
2423 2424  
2424 2425          cmpq    %rax, %rsi              /* test uaddr < kernelbase */
2425 2426          jae     4f
2426 2427          SMAP_DISABLE_INSTR(7)
2427 2428          jmp     do_copystr
2428 2429  4:
2429 2430          movq    %gs:CPU_THREAD, %r9
2430 2431          jmp     3f
2431 2432  
2432 2433  _copyoutstr_error:
2433 2434          SMAP_ENABLE_INSTR(9)
2434 2435          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2435 2436  3:
2436 2437          movq    T_COPYOPS(%r9), %rax
2437 2438          cmpq    $0, %rax
2438 2439          jz      2f
2439 2440  
2440 2441          /*
2441 2442           * reload args for the copyop
2442 2443           */
2443 2444          movq    (%rsp), %rdi
2444 2445          movq    0x8(%rsp), %rsi
2445 2446          movq    0x10(%rsp), %rdx
2446 2447          movq    0x18(%rsp), %rcx
2447 2448          leave
2448 2449          jmp     *CP_COPYOUTSTR(%rax)
2449 2450          
2450 2451  2:      movl    $EFAULT, %eax           /* return EFAULT */
2451 2452          leave
2452 2453          ret
2453 2454          SET_SIZE(copyoutstr)    
2454 2455          
2455 2456  #elif defined(__i386)
2456 2457  
2457 2458  #define ARG_KADDR       4
2458 2459  #define ARG_UADDR       8
2459 2460  
2460 2461          ENTRY(copyoutstr)
2461 2462          movl    kernelbase, %ecx
2462 2463  #ifdef DEBUG
2463 2464          cmpl    %ecx, ARG_KADDR(%esp)
2464 2465          jnb     1f
2465 2466          pushl   %ebp
2466 2467          movl    %esp, %ebp
2467 2468          pushl   $.copyoutstr_panic_msg
2468 2469          call    panic
2469 2470  1:
2470 2471  #endif
2471 2472          lea     _copyoutstr_error, %eax
2472 2473          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2473 2474          jb      do_copystr
2474 2475          movl    %gs:CPU_THREAD, %edx
2475 2476          jmp     3f
2476 2477  
2477 2478  _copyoutstr_error:
2478 2479          popl    %edi
2479 2480          movl    %gs:CPU_THREAD, %edx    
2480 2481          movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
2481 2482  
2482 2483          popl    %edi
2483 2484          popl    %ebx
2484 2485          popl    %ebp
2485 2486  3:
2486 2487          movl    T_COPYOPS(%edx), %eax
2487 2488          cmpl    $0, %eax
2488 2489          jz      2f
2489 2490          jmp     *CP_COPYOUTSTR(%eax)
2490 2491  
2491 2492  2:      movl    $EFAULT, %eax           /* return EFAULT */
2492 2493          ret
2493 2494          SET_SIZE(copyoutstr)
2494 2495          
2495 2496  #undef  ARG_KADDR
2496 2497  #undef  ARG_UADDR
2497 2498  
2498 2499  #endif  /* __i386 */
2499 2500  #endif  /* __lint */
2500 2501  
2501 2502  /*
2502 2503   * Since all of the fuword() variants are so similar, we have a macro to spit
2503 2504   * them out.  This allows us to create DTrace-unobservable functions easily.
2504 2505   */
2505 2506          
2506 2507  #if defined(__lint)
2507 2508  
2508 2509  #if defined(__amd64)
2509 2510  
2510 2511  /* ARGSUSED */
2511 2512  int
2512 2513  fuword64(const void *addr, uint64_t *dst)
2513 2514  { return (0); }
2514 2515  
2515 2516  #endif
2516 2517  
2517 2518  /* ARGSUSED */
2518 2519  int
2519 2520  fuword32(const void *addr, uint32_t *dst)
2520 2521  { return (0); }
2521 2522  
2522 2523  /* ARGSUSED */
2523 2524  int
2524 2525  fuword16(const void *addr, uint16_t *dst)
2525 2526  { return (0); }
2526 2527  
2527 2528  /* ARGSUSED */
2528 2529  int
2529 2530  fuword8(const void *addr, uint8_t *dst)
2530 2531  { return (0); }
2531 2532  
2532 2533  #else   /* __lint */
2533 2534  
2534 2535  #if defined(__amd64)
2535 2536  
2536 2537  /*
2537 2538   * Note that we don't save and reload the arguments here
2538 2539   * because their values are not altered in the copy path.
2539 2540   * Additionally, when successful, the smap_enable jmp will
2540 2541   * actually return us to our original caller.
2541 2542   */
2542 2543  
2543 2544  #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2544 2545          ENTRY(NAME)                             \
2545 2546          movq    %gs:CPU_THREAD, %r9;            \
2546 2547          cmpq    kernelbase(%rip), %rdi;         \
2547 2548          jae     1f;                             \
2548 2549          leaq    _flt_/**/NAME, %rdx;            \
2549 2550          movq    %rdx, T_LOFAULT(%r9);           \
2550 2551          SMAP_DISABLE_INSTR(DISNUM)              \
2551 2552          INSTR   (%rdi), REG;                    \
2552 2553          movq    $0, T_LOFAULT(%r9);             \
2553 2554          INSTR   REG, (%rsi);                    \
2554 2555          xorl    %eax, %eax;                     \
2555 2556          SMAP_ENABLE_INSTR(EN1)                  \
2556 2557          ret;                                    \
2557 2558  _flt_/**/NAME:                                  \
2558 2559          SMAP_ENABLE_INSTR(EN2)                  \
2559 2560          movq    $0, T_LOFAULT(%r9);             \
2560 2561  1:                                              \
2561 2562          movq    T_COPYOPS(%r9), %rax;           \
2562 2563          cmpq    $0, %rax;                       \
2563 2564          jz      2f;                             \
2564 2565          jmp     *COPYOP(%rax);                  \
2565 2566  2:                                              \
2566 2567          movl    $-1, %eax;                      \
2567 2568          ret;                                    \
2568 2569          SET_SIZE(NAME)
2569 2570          
2570 2571          FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2571 2572          FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2572 2573          FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2573 2574          FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2574 2575  
2575 2576  #elif defined(__i386)
2576 2577  
2577 2578  #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2578 2579          ENTRY(NAME)                             \
2579 2580          movl    %gs:CPU_THREAD, %ecx;           \
2580 2581          movl    kernelbase, %eax;               \
2581 2582          cmpl    %eax, 4(%esp);                  \
2582 2583          jae     1f;                             \
2583 2584          lea     _flt_/**/NAME, %edx;            \
2584 2585          movl    %edx, T_LOFAULT(%ecx);          \
2585 2586          movl    4(%esp), %eax;                  \
2586 2587          movl    8(%esp), %edx;                  \
2587 2588          INSTR   (%eax), REG;                    \
2588 2589          movl    $0, T_LOFAULT(%ecx);            \
2589 2590          INSTR   REG, (%edx);                    \
2590 2591          xorl    %eax, %eax;                     \
2591 2592          ret;                                    \
2592 2593  _flt_/**/NAME:                                  \
2593 2594          movl    $0, T_LOFAULT(%ecx);            \
2594 2595  1:                                              \
2595 2596          movl    T_COPYOPS(%ecx), %eax;          \
2596 2597          cmpl    $0, %eax;                       \
2597 2598          jz      2f;                             \
2598 2599          jmp     *COPYOP(%eax);                  \
2599 2600  2:                                              \
2600 2601          movl    $-1, %eax;                      \
2601 2602          ret;                                    \
2602 2603          SET_SIZE(NAME)
2603 2604  
2604 2605          FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2605 2606          FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2606 2607          FUWORD(fuword8, movb, %al, CP_FUWORD8)
2607 2608  
2608 2609  #endif  /* __i386 */
2609 2610  
2610 2611  #undef  FUWORD
2611 2612  
2612 2613  #endif  /* __lint */
2613 2614  
2614 2615  /*
2615 2616   * Set user word.
2616 2617   */
2617 2618  
2618 2619  #if defined(__lint)
2619 2620  
2620 2621  #if defined(__amd64)
2621 2622  
2622 2623  /* ARGSUSED */
2623 2624  int
2624 2625  suword64(void *addr, uint64_t value)
2625 2626  { return (0); }
2626 2627  
2627 2628  #endif
2628 2629  
2629 2630  /* ARGSUSED */
2630 2631  int
2631 2632  suword32(void *addr, uint32_t value)
2632 2633  { return (0); }
2633 2634  
2634 2635  /* ARGSUSED */
2635 2636  int
2636 2637  suword16(void *addr, uint16_t value)
2637 2638  { return (0); }
2638 2639  
2639 2640  /* ARGSUSED */
2640 2641  int
2641 2642  suword8(void *addr, uint8_t value)
2642 2643  { return (0); }
2643 2644  
2644 2645  #else   /* lint */
2645 2646  
2646 2647  #if defined(__amd64)
2647 2648  
2648 2649  /*
2649 2650   * Note that we don't save and reload the arguments here
2650 2651   * because their values are not altered in the copy path.
2651 2652   */
2652 2653  
2653 2654  #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2654 2655          ENTRY(NAME)                             \
2655 2656          movq    %gs:CPU_THREAD, %r9;            \
2656 2657          cmpq    kernelbase(%rip), %rdi;         \
2657 2658          jae     1f;                             \
2658 2659          leaq    _flt_/**/NAME, %rdx;            \
2659 2660          SMAP_DISABLE_INSTR(DISNUM)              \
2660 2661          movq    %rdx, T_LOFAULT(%r9);           \
2661 2662          INSTR   REG, (%rdi);                    \
2662 2663          movq    $0, T_LOFAULT(%r9);             \
2663 2664          xorl    %eax, %eax;                     \
2664 2665          SMAP_ENABLE_INSTR(EN1)                  \
2665 2666          ret;                                    \
2666 2667  _flt_/**/NAME:                                  \
2667 2668          SMAP_ENABLE_INSTR(EN2)                  \
2668 2669          movq    $0, T_LOFAULT(%r9);             \
2669 2670  1:                                              \
2670 2671          movq    T_COPYOPS(%r9), %rax;           \
2671 2672          cmpq    $0, %rax;                       \
2672 2673          jz      3f;                             \
2673 2674          jmp     *COPYOP(%rax);                  \
2674 2675  3:                                              \
2675 2676          movl    $-1, %eax;                      \
2676 2677          ret;                                    \
2677 2678          SET_SIZE(NAME)
2678 2679  
2679 2680          SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2680 2681          SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2681 2682          SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2682 2683          SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2683 2684  
2684 2685  #elif defined(__i386)
2685 2686  
2686 2687  #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2687 2688          ENTRY(NAME)                             \
2688 2689          movl    %gs:CPU_THREAD, %ecx;           \
2689 2690          movl    kernelbase, %eax;               \
2690 2691          cmpl    %eax, 4(%esp);                  \
2691 2692          jae     1f;                             \
2692 2693          lea     _flt_/**/NAME, %edx;            \
2693 2694          movl    %edx, T_LOFAULT(%ecx);          \
2694 2695          movl    4(%esp), %eax;                  \
2695 2696          movl    8(%esp), %edx;                  \
2696 2697          INSTR   REG, (%eax);                    \
2697 2698          movl    $0, T_LOFAULT(%ecx);            \
2698 2699          xorl    %eax, %eax;                     \
2699 2700          ret;                                    \
2700 2701  _flt_/**/NAME:                                  \
2701 2702          movl    $0, T_LOFAULT(%ecx);            \
2702 2703  1:                                              \
2703 2704          movl    T_COPYOPS(%ecx), %eax;          \
2704 2705          cmpl    $0, %eax;                       \
2705 2706          jz      3f;                             \
2706 2707          movl    COPYOP(%eax), %ecx;             \
2707 2708          jmp     *%ecx;                          \
2708 2709  3:                                              \
2709 2710          movl    $-1, %eax;                      \
2710 2711          ret;                                    \
2711 2712          SET_SIZE(NAME)
2712 2713  
2713 2714          SUWORD(suword32, movl, %edx, CP_SUWORD32)
2714 2715          SUWORD(suword16, movw, %dx, CP_SUWORD16)
2715 2716          SUWORD(suword8, movb, %dl, CP_SUWORD8)
2716 2717  
2717 2718  #endif  /* __i386 */
2718 2719  
2719 2720  #undef  SUWORD
2720 2721  
2721 2722  #endif  /* __lint */
2722 2723  
2723 2724  #if defined(__lint)
2724 2725  
2725 2726  #if defined(__amd64)
2726 2727  
2727 2728  /*ARGSUSED*/
2728 2729  void
2729 2730  fuword64_noerr(const void *addr, uint64_t *dst)
2730 2731  {}
2731 2732  
2732 2733  #endif
2733 2734  
2734 2735  /*ARGSUSED*/
2735 2736  void
2736 2737  fuword32_noerr(const void *addr, uint32_t *dst)
2737 2738  {}
2738 2739  
2739 2740  /*ARGSUSED*/
2740 2741  void
2741 2742  fuword8_noerr(const void *addr, uint8_t *dst)
2742 2743  {}
2743 2744  
2744 2745  /*ARGSUSED*/
2745 2746  void
2746 2747  fuword16_noerr(const void *addr, uint16_t *dst)
2747 2748  {}
2748 2749  
2749 2750  #else   /* __lint */
2750 2751  
2751 2752  #if defined(__amd64)
2752 2753  
2753 2754  #define FUWORD_NOERR(NAME, INSTR, REG)          \
2754 2755          ENTRY(NAME)                             \
2755 2756          cmpq    kernelbase(%rip), %rdi;         \
2756 2757          cmovnbq kernelbase(%rip), %rdi;         \
2757 2758          INSTR   (%rdi), REG;                    \
2758 2759          INSTR   REG, (%rsi);                    \
2759 2760          ret;                                    \
2760 2761          SET_SIZE(NAME)
2761 2762  
2762 2763          FUWORD_NOERR(fuword64_noerr, movq, %rax)
2763 2764          FUWORD_NOERR(fuword32_noerr, movl, %eax)
2764 2765          FUWORD_NOERR(fuword16_noerr, movw, %ax)
2765 2766          FUWORD_NOERR(fuword8_noerr, movb, %al)
2766 2767  
2767 2768  #elif defined(__i386)
2768 2769  
2769 2770  #define FUWORD_NOERR(NAME, INSTR, REG)          \
2770 2771          ENTRY(NAME)                             \
2771 2772          movl    4(%esp), %eax;                  \
2772 2773          cmpl    kernelbase, %eax;               \
2773 2774          jb      1f;                             \
2774 2775          movl    kernelbase, %eax;               \
2775 2776  1:      movl    8(%esp), %edx;                  \
2776 2777          INSTR   (%eax), REG;                    \
2777 2778          INSTR   REG, (%edx);                    \
2778 2779          ret;                                    \
2779 2780          SET_SIZE(NAME)
2780 2781  
2781 2782          FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2782 2783          FUWORD_NOERR(fuword16_noerr, movw, %cx)
2783 2784          FUWORD_NOERR(fuword8_noerr, movb, %cl)
2784 2785  
2785 2786  #endif  /* __i386 */
2786 2787  
2787 2788  #undef  FUWORD_NOERR
2788 2789  
2789 2790  #endif  /* __lint */
2790 2791  
2791 2792  #if defined(__lint)
2792 2793  
2793 2794  #if defined(__amd64)
2794 2795  
2795 2796  /*ARGSUSED*/
2796 2797  void
2797 2798  suword64_noerr(void *addr, uint64_t value)
2798 2799  {}
2799 2800  
2800 2801  #endif
2801 2802  
2802 2803  /*ARGSUSED*/
2803 2804  void
2804 2805  suword32_noerr(void *addr, uint32_t value)
2805 2806  {}
2806 2807  
2807 2808  /*ARGSUSED*/
2808 2809  void
2809 2810  suword16_noerr(void *addr, uint16_t value)
2810 2811  {}
2811 2812  
2812 2813  /*ARGSUSED*/
2813 2814  void
2814 2815  suword8_noerr(void *addr, uint8_t value)
2815 2816  {}
2816 2817  
2817 2818  #else   /* lint */
2818 2819  
2819 2820  #if defined(__amd64)
2820 2821  
2821 2822  #define SUWORD_NOERR(NAME, INSTR, REG)          \
2822 2823          ENTRY(NAME)                             \
2823 2824          cmpq    kernelbase(%rip), %rdi;         \
2824 2825          cmovnbq kernelbase(%rip), %rdi;         \
2825 2826          INSTR   REG, (%rdi);                    \
2826 2827          ret;                                    \
2827 2828          SET_SIZE(NAME)
2828 2829  
2829 2830          SUWORD_NOERR(suword64_noerr, movq, %rsi)
2830 2831          SUWORD_NOERR(suword32_noerr, movl, %esi)
2831 2832          SUWORD_NOERR(suword16_noerr, movw, %si)
2832 2833          SUWORD_NOERR(suword8_noerr, movb, %sil)
2833 2834  
2834 2835  #elif defined(__i386)
2835 2836  
2836 2837  #define SUWORD_NOERR(NAME, INSTR, REG)          \
2837 2838          ENTRY(NAME)                             \
2838 2839          movl    4(%esp), %eax;                  \
2839 2840          cmpl    kernelbase, %eax;               \
2840 2841          jb      1f;                             \
2841 2842          movl    kernelbase, %eax;               \
2842 2843  1:                                              \
2843 2844          movl    8(%esp), %edx;                  \
2844 2845          INSTR   REG, (%eax);                    \
2845 2846          ret;                                    \
2846 2847          SET_SIZE(NAME)
2847 2848  
2848 2849          SUWORD_NOERR(suword32_noerr, movl, %edx)
2849 2850          SUWORD_NOERR(suword16_noerr, movw, %dx)
2850 2851          SUWORD_NOERR(suword8_noerr, movb, %dl)
2851 2852  
2852 2853  #endif  /* __i386 */
2853 2854  
2854 2855  #undef  SUWORD_NOERR
2855 2856  
2856 2857  #endif  /* lint */
2857 2858  
2858 2859  
2859 2860  #if defined(__lint)
2860 2861  
2861 2862  /*ARGSUSED*/
2862 2863  int
2863 2864  subyte(void *addr, uchar_t value)
2864 2865  { return (0); }
2865 2866  
2866 2867  /*ARGSUSED*/
2867 2868  void
2868 2869  subyte_noerr(void *addr, uchar_t value)
2869 2870  {}
2870 2871  
2871 2872  /*ARGSUSED*/
2872 2873  int
2873 2874  fulword(const void *addr, ulong_t *valuep)
2874 2875  { return (0); }
2875 2876  
2876 2877  /*ARGSUSED*/
2877 2878  void
2878 2879  fulword_noerr(const void *addr, ulong_t *valuep)
2879 2880  {}
2880 2881  
2881 2882  /*ARGSUSED*/
2882 2883  int
2883 2884  sulword(void *addr, ulong_t valuep)
2884 2885  { return (0); }
2885 2886  
2886 2887  /*ARGSUSED*/
2887 2888  void
2888 2889  sulword_noerr(void *addr, ulong_t valuep)
2889 2890  {}
2890 2891  
2891 2892  #else
2892 2893  
2893 2894          .weak   subyte
2894 2895          subyte=suword8
2895 2896          .weak   subyte_noerr
2896 2897          subyte_noerr=suword8_noerr
2897 2898  
2898 2899  #if defined(__amd64)
2899 2900  
2900 2901          .weak   fulword
2901 2902          fulword=fuword64
2902 2903          .weak   fulword_noerr
2903 2904          fulword_noerr=fuword64_noerr
2904 2905          .weak   sulword
2905 2906          sulword=suword64
2906 2907          .weak   sulword_noerr
2907 2908          sulword_noerr=suword64_noerr
2908 2909  
2909 2910  #elif defined(__i386)
2910 2911  
2911 2912          .weak   fulword
2912 2913          fulword=fuword32
2913 2914          .weak   fulword_noerr
2914 2915          fulword_noerr=fuword32_noerr
2915 2916          .weak   sulword
2916 2917          sulword=suword32
2917 2918          .weak   sulword_noerr
2918 2919          sulword_noerr=suword32_noerr
2919 2920  
2920 2921  #endif /* __i386 */
2921 2922  
2922 2923  #endif /* __lint */
2923 2924  
2924 2925  #if defined(__lint)
2925 2926  
2926 2927  /*
2927 2928   * Copy a block of storage - must not overlap (from + len <= to).
2928 2929   * No fault handler installed (to be called under on_fault())
2929 2930   */
2930 2931  
2931 2932  /* ARGSUSED */
2932 2933  void
2933 2934  copyout_noerr(const void *kfrom, void *uto, size_t count)
2934 2935  {}
2935 2936  
2936 2937  /* ARGSUSED */
2937 2938  void
2938 2939  copyin_noerr(const void *ufrom, void *kto, size_t count)
2939 2940  {}
2940 2941  
2941 2942  /*
2942 2943   * Zero a block of storage in user space
2943 2944   */
2944 2945  
2945 2946  /* ARGSUSED */
2946 2947  void
2947 2948  uzero(void *addr, size_t count)
2948 2949  {}
2949 2950  
2950 2951  /*
2951 2952   * copy a block of storage in user space
2952 2953   */
2953 2954  
2954 2955  /* ARGSUSED */
2955 2956  void
2956 2957  ucopy(const void *ufrom, void *uto, size_t ulength)
2957 2958  {}
2958 2959  
2959 2960  /*
2960 2961   * copy a string in user space
2961 2962   */
2962 2963  
2963 2964  /* ARGSUSED */
2964 2965  void
2965 2966  ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2966 2967  {}
2967 2968  
2968 2969  #else /* __lint */
2969 2970  
2970 2971  #if defined(__amd64)
2971 2972  
2972 2973          ENTRY(copyin_noerr)
2973 2974          movq    kernelbase(%rip), %rax
2974 2975  #ifdef DEBUG
2975 2976          cmpq    %rax, %rsi              /* %rsi = kto */
2976 2977          jae     1f
2977 2978          leaq    .cpyin_ne_pmsg(%rip), %rdi
2978 2979          jmp     call_panic              /* setup stack and call panic */
2979 2980  1:
2980 2981  #endif
2981 2982          cmpq    %rax, %rdi              /* ufrom < kernelbase */
2982 2983          jb      do_copy
2983 2984          movq    %rax, %rdi              /* force fault at kernelbase */
2984 2985          jmp     do_copy
2985 2986          SET_SIZE(copyin_noerr)
2986 2987  
2987 2988          ENTRY(copyout_noerr)
2988 2989          movq    kernelbase(%rip), %rax
2989 2990  #ifdef DEBUG
2990 2991          cmpq    %rax, %rdi              /* %rdi = kfrom */
2991 2992          jae     1f
2992 2993          leaq    .cpyout_ne_pmsg(%rip), %rdi
2993 2994          jmp     call_panic              /* setup stack and call panic */
2994 2995  1:
2995 2996  #endif
2996 2997          cmpq    %rax, %rsi              /* uto < kernelbase */
2997 2998          jb      do_copy
2998 2999          movq    %rax, %rsi              /* force fault at kernelbase */
2999 3000          jmp     do_copy
3000 3001          SET_SIZE(copyout_noerr)
3001 3002  
3002 3003          ENTRY(uzero)
3003 3004          movq    kernelbase(%rip), %rax
3004 3005          cmpq    %rax, %rdi
3005 3006          jb      do_zero
3006 3007          movq    %rax, %rdi      /* force fault at kernelbase */
3007 3008          jmp     do_zero
3008 3009          SET_SIZE(uzero)
3009 3010  
3010 3011          ENTRY(ucopy)
3011 3012          movq    kernelbase(%rip), %rax
3012 3013          cmpq    %rax, %rdi
3013 3014          cmovaeq %rax, %rdi      /* force fault at kernelbase */
3014 3015          cmpq    %rax, %rsi
3015 3016          cmovaeq %rax, %rsi      /* force fault at kernelbase */
3016 3017          jmp     do_copy
3017 3018          SET_SIZE(ucopy)
3018 3019  
3019 3020          /*
3020 3021           * Note, the frame pointer is required here becuase do_copystr expects
3021 3022           * to be able to pop it off!
3022 3023           */
3023 3024          ENTRY(ucopystr)
3024 3025          pushq   %rbp
3025 3026          movq    %rsp, %rbp
3026 3027          movq    kernelbase(%rip), %rax
3027 3028          cmpq    %rax, %rdi
3028 3029          cmovaeq %rax, %rdi      /* force fault at kernelbase */
3029 3030          cmpq    %rax, %rsi
3030 3031          cmovaeq %rax, %rsi      /* force fault at kernelbase */
3031 3032          /* do_copystr expects lofault address in %r8 */
3032 3033          /* do_copystr expects whether or not we need smap in %r10 */
3033 3034          xorl    %r10d, %r10d
3034 3035          movq    %gs:CPU_THREAD, %r8
3035 3036          movq    T_LOFAULT(%r8), %r8
3036 3037          jmp     do_copystr
3037 3038          SET_SIZE(ucopystr)
3038 3039  
3039 3040  #elif defined(__i386)
3040 3041  
3041 3042          ENTRY(copyin_noerr)
3042 3043          movl    kernelbase, %eax
3043 3044  #ifdef DEBUG
3044 3045          cmpl    %eax, 8(%esp)
3045 3046          jae     1f
3046 3047          pushl   $.cpyin_ne_pmsg
3047 3048          call    panic
3048 3049  1:
3049 3050  #endif
3050 3051          cmpl    %eax, 4(%esp)
3051 3052          jb      do_copy
3052 3053          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3053 3054          jmp     do_copy
3054 3055          SET_SIZE(copyin_noerr)
3055 3056  
3056 3057          ENTRY(copyout_noerr)
3057 3058          movl    kernelbase, %eax
3058 3059  #ifdef DEBUG
3059 3060          cmpl    %eax, 4(%esp)
3060 3061          jae     1f
3061 3062          pushl   $.cpyout_ne_pmsg
3062 3063          call    panic
3063 3064  1:
3064 3065  #endif
3065 3066          cmpl    %eax, 8(%esp)
3066 3067          jb      do_copy
3067 3068          movl    %eax, 8(%esp)   /* force fault at kernelbase */
3068 3069          jmp     do_copy
3069 3070          SET_SIZE(copyout_noerr)
3070 3071  
3071 3072          ENTRY(uzero)
3072 3073          movl    kernelbase, %eax
3073 3074          cmpl    %eax, 4(%esp)
3074 3075          jb      do_zero
3075 3076          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3076 3077          jmp     do_zero
3077 3078          SET_SIZE(uzero)
3078 3079  
3079 3080          ENTRY(ucopy)
3080 3081          movl    kernelbase, %eax
3081 3082          cmpl    %eax, 4(%esp)
3082 3083          jb      1f
3083 3084          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3084 3085  1:
3085 3086          cmpl    %eax, 8(%esp)
3086 3087          jb      do_copy
3087 3088          movl    %eax, 8(%esp)   /* force fault at kernelbase */
3088 3089          jmp     do_copy
3089 3090          SET_SIZE(ucopy)
3090 3091  
3091 3092          ENTRY(ucopystr)
3092 3093          movl    kernelbase, %eax
3093 3094          cmpl    %eax, 4(%esp)
3094 3095          jb      1f
3095 3096          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3096 3097  1:
3097 3098          cmpl    %eax, 8(%esp)
3098 3099          jb      2f
3099 3100          movl    %eax, 8(%esp)   /* force fault at kernelbase */
3100 3101  2:
3101 3102          /* do_copystr expects the lofault address in %eax */
3102 3103          movl    %gs:CPU_THREAD, %eax
3103 3104          movl    T_LOFAULT(%eax), %eax
3104 3105          jmp     do_copystr
3105 3106          SET_SIZE(ucopystr)
3106 3107  
3107 3108  #endif  /* __i386 */
3108 3109  
3109 3110  #ifdef DEBUG
3110 3111          .data
3111 3112  .kcopy_panic_msg:
3112 3113          .string "kcopy: arguments below kernelbase"
3113 3114  .bcopy_panic_msg:
3114 3115          .string "bcopy: arguments below kernelbase"
3115 3116  .kzero_panic_msg:
3116 3117          .string "kzero: arguments below kernelbase"
3117 3118  .bzero_panic_msg:
3118 3119          .string "bzero: arguments below kernelbase"
3119 3120  .copyin_panic_msg:
3120 3121          .string "copyin: kaddr argument below kernelbase"
3121 3122  .xcopyin_panic_msg:
3122 3123          .string "xcopyin: kaddr argument below kernelbase"
3123 3124  .copyout_panic_msg:
3124 3125          .string "copyout: kaddr argument below kernelbase"
3125 3126  .xcopyout_panic_msg:
3126 3127          .string "xcopyout: kaddr argument below kernelbase"
3127 3128  .copystr_panic_msg:
3128 3129          .string "copystr: arguments in user space"
3129 3130  .copyinstr_panic_msg:
3130 3131          .string "copyinstr: kaddr argument not in kernel address space"
3131 3132  .copyoutstr_panic_msg:
3132 3133          .string "copyoutstr: kaddr argument not in kernel address space"
3133 3134  .cpyin_ne_pmsg:
3134 3135          .string "copyin_noerr: argument not in kernel address space"
3135 3136  .cpyout_ne_pmsg:
3136 3137          .string "copyout_noerr: argument not in kernel address space"
3137 3138  #endif
3138 3139  
3139 3140  #endif  /* __lint */
3140 3141  
3141 3142  /*
3142 3143   * These functions are used for SMAP, supervisor mode access protection. They
3143 3144   * are hotpatched to become real instructions when the system starts up which is
3144 3145   * done in mlsetup() as a part of enabling the other CR4 related features.
3145 3146   *
3146 3147   * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3147 3148   * clac instruction. It's safe to call these any number of times, and in fact,
3148 3149   * out of paranoia, the kernel will likely call it at several points.
3149 3150   */
3150 3151  
3151 3152  #if defined(__lint)
3152 3153  
3153 3154  void
3154 3155  smap_enable(void)
3155 3156  {}
3156 3157  
3157 3158  void
3158 3159  smap_disable(void)
3159 3160  {}
3160 3161  
3161 3162  #else
3162 3163  
3163 3164  #if defined (__amd64) || defined(__i386)
3164 3165          ENTRY(smap_disable)
3165 3166          nop
3166 3167          nop
3167 3168          nop
3168 3169          ret
3169 3170          SET_SIZE(smap_disable)
3170 3171  
3171 3172          ENTRY(smap_enable)
3172 3173          nop
3173 3174          nop
3174 3175          nop
3175 3176          ret
3176 3177          SET_SIZE(smap_enable)
3177 3178  
3178 3179  #endif /* __amd64 || __i386 */
3179 3180  
3180 3181  #endif /* __lint */
3181 3182  
3182 3183  #ifndef __lint
3183 3184  
3184 3185  .data
3185 3186  .align  4
3186 3187  .globl  _smap_enable_patch_count
3187 3188  .type   _smap_enable_patch_count,@object
3188 3189  .size   _smap_enable_patch_count, 4
3189 3190  _smap_enable_patch_count:
3190 3191          .long   SMAP_ENABLE_COUNT
3191 3192  
3192 3193  .globl  _smap_disable_patch_count
3193 3194  .type   _smap_disable_patch_count,@object
3194 3195  .size   _smap_disable_patch_count, 4
3195 3196  _smap_disable_patch_count:
3196 3197          .long SMAP_DISABLE_COUNT
3197 3198  
3198 3199  #endif /* __lint */
  
    | ↓ open down ↓ | 2230 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX